From eb0b3e78e6290e5a1bf970830939d5c8c9151892 Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhuix.pan@intel.com>
Date: Tue, 7 Jul 2015 20:43:26 +0800
Subject: [PATCH 01/53] acpi-cpufreq: replace per_cpu with driver_data of
 policy

Drivers can store their internal per-policy information in
policy->driver_data, lets use it.

we have benefits after this replacing.
1) memory saving.
2) policy is shared by several cpus, per_cpu seems not correct. using
*driver_data* is more reasonable.
3) fix a memory leak in acpi_cpufreq_cpu_exit. as policy->cpu might
change during cpu hotplug. So sometimes we cant't free *data*, use
*driver_data* to fix it.
4) fix a zero return value of get_cur_freq_on_cpu. Only per_cpu of
policy->cpu is set to *data*, if we try to get cpufreq on other cpus, we
get zero instead of correct values. Use *driver_data* to fix it.

Signed-off-by: Pan Xinhui <xinhuix.pan@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c | 40 +++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 0136dfcdabf0..e7fcaa6eedbd 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -72,8 +72,6 @@ struct acpi_cpufreq_data {
 	cpumask_var_t freqdomain_cpus;
 };
 
-static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
-
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance __percpu *acpi_perf_data;
 
@@ -144,7 +142,7 @@ static int _store_boost(int val)
 
 static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
 {
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+	struct acpi_cpufreq_data *data = policy->driver_data;
 
 	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
 }
@@ -327,7 +325,8 @@ static void drv_write(struct drv_cmd *cmd)
 	put_cpu();
 }
 
-static u32 get_cur_val(const struct cpumask *mask)
+static u32
+get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
 {
 	struct acpi_processor_performance *perf;
 	struct drv_cmd cmd;
@@ -335,7 +334,7 @@ static u32 get_cur_val(const struct cpumask *mask)
 	if (unlikely(cpumask_empty(mask)))
 		return 0;
 
-	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
+	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
 		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
@@ -346,7 +345,7 @@ static u32 get_cur_val(const struct cpumask *mask)
 		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
-		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
+		perf = data->acpi_data;
 		cmd.addr.io.port = perf->control_register.address;
 		cmd.addr.io.bit_width = perf->control_register.bit_width;
 		break;
@@ -364,19 +363,24 @@ static u32 get_cur_val(const struct cpumask *mask)
 
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
+	struct acpi_cpufreq_data *data;
+	struct cpufreq_policy *policy;
 	unsigned int freq;
 	unsigned int cached_freq;
 
 	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
 
-	if (unlikely(data == NULL ||
-		     data->acpi_data == NULL || data->freq_table == NULL)) {
+	policy = cpufreq_cpu_get(cpu);
+	if (unlikely(!policy))
+		return 0;
+
+	data = policy->driver_data;
+	cpufreq_cpu_put(policy);
+	if (unlikely(!data || !data->acpi_data || !data->freq_table))
 		return 0;
-	}
 
 	cached_freq = data->freq_table[data->acpi_data->state].frequency;
-	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
+	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
 	if (freq != cached_freq) {
 		/*
 		 * The dreaded BIOS frequency change behind our back.
@@ -397,7 +401,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
 	unsigned int i;
 
 	for (i = 0; i < 100; i++) {
-		cur_freq = extract_freq(get_cur_val(mask), data);
+		cur_freq = extract_freq(get_cur_val(mask, data), data);
 		if (cur_freq == freq)
 			return 1;
 		udelay(10);
@@ -408,7 +412,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 			       unsigned int index)
 {
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct acpi_processor_performance *perf;
 	struct drv_cmd cmd;
 	unsigned int next_perf_state = 0; /* Index into perf table */
@@ -673,7 +677,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	}
 
 	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
-	per_cpu(acfreq_data, cpu) = data;
+	policy->driver_data = data;
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@ -843,19 +847,19 @@ err_free_mask:
 	free_cpumask_var(data->freqdomain_cpus);
 err_free:
 	kfree(data);
-	per_cpu(acfreq_data, cpu) = NULL;
+	policy->driver_data = NULL;
 
 	return result;
 }
 
 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+	struct acpi_cpufreq_data *data = policy->driver_data;
 
 	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
-		per_cpu(acfreq_data, policy->cpu) = NULL;
+		policy->driver_data = NULL;
 		acpi_processor_unregister_performance(data->acpi_data,
 						      policy->cpu);
 		free_cpumask_var(data->freqdomain_cpus);
@@ -868,7 +872,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 
 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+	struct acpi_cpufreq_data *data = policy->driver_data;
 
 	pr_debug("acpi_cpufreq_resume\n");
 

From 8101f99703048ceaa31c756abe1098d099249ad9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 8 Jul 2015 15:12:15 +0530
Subject: [PATCH 02/53] cpufreq: cpufreq_add_dev: name goto labels based on
 what they do

These labels are are named in two ways normally:
 - Based on what caused to jump to such labels
 - Based on what we do under such labels

We follow the first naming convention today and that leads to multiple
labels for doing the same work. Fix it by switching to the second way of
naming them.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 26063afb3eba..702777b1d645 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1288,7 +1288,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 		recover_policy = false;
 		policy = cpufreq_policy_alloc(dev);
 		if (!policy)
-			goto nomem_out;
+			goto out_release_rwsem;
 	}
 
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
@@ -1299,7 +1299,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	ret = cpufreq_driver->init(policy);
 	if (ret) {
 		pr_debug("initialization failed\n");
-		goto err_set_policy_cpu;
+		goto out_free_policy;
 	}
 
 	down_write(&policy->rwsem);
@@ -1327,7 +1327,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 		policy->cur = cpufreq_driver->get(policy->cpu);
 		if (!policy->cur) {
 			pr_err("%s: ->get() failed\n", __func__);
-			goto err_get_freq;
+			goto out_exit_policy;
 		}
 	}
 
@@ -1377,7 +1377,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	if (!recover_policy) {
 		ret = cpufreq_add_dev_interface(policy, dev);
 		if (ret)
-			goto err_out_unregister;
+			goto out_exit_policy;
 		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				CPUFREQ_CREATE_POLICY, policy);
 
@@ -1406,15 +1406,14 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	return 0;
 
-err_out_unregister:
-err_get_freq:
+out_exit_policy:
 	up_write(&policy->rwsem);
 
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
-err_set_policy_cpu:
+out_free_policy:
 	cpufreq_policy_free(policy, recover_policy);
-nomem_out:
+out_release_rwsem:
 	up_read(&cpufreq_rwsem);
 
 	return ret;

From 7f0fa40f5a587c2a026de776cc6a26373ac0f244 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 8 Jul 2015 15:12:16 +0530
Subject: [PATCH 03/53] cpufreq: Properly handle errors from
 cpufreq_init_policy()

cpufreq_init_policy() can fail, and we don't do anything except a call
to ->exit() on that. The policy should be freed if this happens.

Do it properly.

Reported-and-tested-by: "Jon Medhurst (Tixy)" <tixy@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 702777b1d645..a7b6ac6e048e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1060,11 +1060,10 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
 	return cpufreq_add_dev_symlink(policy);
 }
 
-static void cpufreq_init_policy(struct cpufreq_policy *policy)
+static int cpufreq_init_policy(struct cpufreq_policy *policy)
 {
 	struct cpufreq_governor *gov = NULL;
 	struct cpufreq_policy new_policy;
-	int ret = 0;
 
 	memcpy(&new_policy, policy, sizeof(*policy));
 
@@ -1083,12 +1082,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
 		cpufreq_parse_governor(gov->name, &new_policy.policy, NULL);
 
 	/* set default policy */
-	ret = cpufreq_set_policy(policy, &new_policy);
-	if (ret) {
-		pr_debug("setting policy failed\n");
-		if (cpufreq_driver->exit)
-			cpufreq_driver->exit(policy);
-	}
+	return cpufreq_set_policy(policy, &new_policy);
 }
 
 static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
@@ -1386,7 +1380,12 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 	}
 
-	cpufreq_init_policy(policy);
+	ret = cpufreq_init_policy(policy);
+	if (ret) {
+		pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
+		       __func__, cpu, ret);
+		goto out_remove_policy_notify;
+	}
 
 	if (!recover_policy) {
 		policy->user_policy.policy = policy->policy;
@@ -1406,6 +1405,9 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	return 0;
 
+out_remove_policy_notify:
+	/* cpufreq_policy_free() will notify based on this */
+	recover_policy = true;
 out_exit_policy:
 	up_write(&policy->rwsem);
 

From 8cfcfd39000d54188e0df1e0fafe63f53897b62a Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhuix.pan@intel.com>
Date: Fri, 10 Jul 2015 14:36:20 +0800
Subject: [PATCH 04/53] acpi-cpufreq: Fix an ACPI perf unregister issue

As policy->cpu may not be same in acpi_cpufreq_cpu_init() and
acpi_cpufreq_cpu_exit(). There is a risk that we use different CPU
to un/register ACPI performance. So acpi_processor_unregister_performance()
may not be able to do the cleanup work. That causes a memory leak. And
if there will be another acpi_processor_register_performance() call,
it may also fail thanks to the internal check of pr->performace.

So add a new struct acpi_cpufreq_data field, acpi_perf_cpu, to fix
this issue.

Signed-off-by: Pan Xinhui <xinhuix.pan@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index e7fcaa6eedbd..de54ce14eb39 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -69,6 +69,7 @@ struct acpi_cpufreq_data {
 	struct cpufreq_frequency_table *freq_table;
 	unsigned int resume;
 	unsigned int cpu_feature;
+	unsigned int acpi_perf_cpu;
 	cpumask_var_t freqdomain_cpus;
 };
 
@@ -677,6 +678,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	}
 
 	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
+	data->acpi_perf_cpu = cpu;
 	policy->driver_data = data;
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
@@ -861,7 +863,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	if (data) {
 		policy->driver_data = NULL;
 		acpi_processor_unregister_performance(data->acpi_data,
-						      policy->cpu);
+						      data->acpi_perf_cpu);
 		free_cpumask_var(data->freqdomain_cpus);
 		kfree(data->freq_table);
 		kfree(data);

From 3a5f5b2e3b4ba165e342c4e969c7fa8d85be0d94 Mon Sep 17 00:00:00 2001
From: Cristian Ardelean <cristian97.ardelean@gmail.com>
Date: Fri, 10 Jul 2015 14:42:15 -0400
Subject: [PATCH 05/53] cpufreq: integrator: fixed coding style issues

Fixed coding style issues found by checkpatch.pl tool. Changed
space indentation to tab, removed unneccesary braces, removed
space between MODULE macros and parentheses.

REMARKS: failed to 'make' this file with error message
'fatal error: asm/mach-types.h: No such file or directory'.

Signed-off-by: Cristian Ardelean <cristian97.ardelean@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/integrator-cpufreq.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c
index 129e266f7621..2faa4216bf2a 100644
--- a/drivers/cpufreq/integrator-cpufreq.c
+++ b/drivers/cpufreq/integrator-cpufreq.c
@@ -98,11 +98,10 @@ static int integrator_set_target(struct cpufreq_policy *policy,
 	/* get current setting */
 	cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET);
 
-	if (machine_is_integrator()) {
+	if (machine_is_integrator())
 		vco.s = (cm_osc >> 8) & 7;
-	} else if (machine_is_cintegrator()) {
+	else if (machine_is_cintegrator())
 		vco.s = 1;
-	}
 	vco.v = cm_osc & 255;
 	vco.r = 22;
 	freqs.old = icst_hz(&cclk_params, vco) / 1000;
@@ -163,11 +162,10 @@ static unsigned int integrator_get(unsigned int cpu)
 	/* detect memory etc. */
 	cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET);
 
-	if (machine_is_integrator()) {
+	if (machine_is_integrator())
 		vco.s = (cm_osc >> 8) & 7;
-	} else {
+	else
 		vco.s = 1;
-	}
 	vco.v = cm_osc & 255;
 	vco.r = 22;
 
@@ -203,7 +201,7 @@ static int __init integrator_cpufreq_probe(struct platform_device *pdev)
 	struct resource *res;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-        if (!res)
+	if (!res)
 		return -ENODEV;
 
 	cm_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
@@ -234,6 +232,6 @@ static struct platform_driver integrator_cpufreq_driver = {
 module_platform_driver_probe(integrator_cpufreq_driver,
 			     integrator_cpufreq_probe);
 
-MODULE_AUTHOR ("Russell M. King");
-MODULE_DESCRIPTION ("cpufreq driver for ARM Integrator CPUs");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Russell M. King");
+MODULE_DESCRIPTION("cpufreq driver for ARM Integrator CPUs");
+MODULE_LICENSE("GPL");

From ba88d4338f226766f510e207911dde8c1875e072 Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi <kristen@linux.intel.com>
Date: Tue, 14 Jul 2015 09:46:23 -0700
Subject: [PATCH 06/53] intel_pstate: enable HWP per CPU

HWP previously was only enabled at driver load time, on the boot
CPU, however, HWP must be enabled per package. Move the code to
enable HWP to the cpufreq driver init path so that it will be
called per CPU.

Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Tested-by: David Zhuang <david.zhuang@oracle.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 15ada47bb720..763d8f34ab49 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -484,12 +484,11 @@ static void __init intel_pstate_sysfs_expose_params(void)
 }
 /************************** sysfs end ************************/
 
-static void intel_pstate_hwp_enable(void)
+static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
-	hwp_active++;
 	pr_info("intel_pstate: HWP enabled\n");
 
-	wrmsrl( MSR_PM_ENABLE, 0x1);
+	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 }
 
 static int byt_get_min_pstate(void)
@@ -932,6 +931,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 	cpu = all_cpu_data[cpunum];
 
 	cpu->cpu = cpunum;
+
+	if (hwp_active)
+		intel_pstate_hwp_enable(cpu);
+
 	intel_pstate_get_cpu_pstates(cpu);
 
 	init_timer_deferrable(&cpu->timer);
@@ -1245,7 +1248,7 @@ static int __init intel_pstate_init(void)
 		return -ENOMEM;
 
 	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp)
-		intel_pstate_hwp_enable();
+		hwp_active++;
 
 	if (!hwp_active && hwp_only)
 		goto out;

From 386d46e6d5238c9648399eb1e0c418d06f4126a2 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 19 Jun 2015 17:18:01 +0530
Subject: [PATCH 07/53] cpufreq: governor: Name delayed-work as dwork

Delayed work was named as 'work' and to access work within it we do
work.work. Not much readable. Rename delayed_work as 'dwork'.

Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c | 2 +-
 drivers/cpufreq/cpufreq_governor.c     | 6 +++---
 drivers/cpufreq/cpufreq_governor.h     | 2 +-
 drivers/cpufreq/cpufreq_ondemand.c     | 8 ++++----
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index c86a10c30912..0e3ec1d968d9 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -105,7 +105,7 @@ static void cs_check_cpu(int cpu, unsigned int load)
 static void cs_dbs_timer(struct work_struct *work)
 {
 	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
-			struct cs_cpu_dbs_info_s, cdbs.work.work);
+			struct cs_cpu_dbs_info_s, cdbs.dwork.work);
 	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
 	struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info,
 			cpu);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 57a39f8a92b7..6024bbc782c0 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -165,7 +165,7 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
 {
 	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 
-	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
+	mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay);
 }
 
 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
@@ -204,7 +204,7 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
 
 	for_each_cpu(i, policy->cpus) {
 		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
-		cancel_delayed_work_sync(&cdbs->work);
+		cancel_delayed_work_sync(&cdbs->dwork);
 	}
 }
 
@@ -367,7 +367,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
 		mutex_init(&j_cdbs->timer_mutex);
-		INIT_DEFERRABLE_WORK(&j_cdbs->work, cdata->gov_dbs_timer);
+		INIT_DEFERRABLE_WORK(&j_cdbs->dwork, cdata->gov_dbs_timer);
 	}
 
 	if (cdata->governor == GOV_CONSERVATIVE) {
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 34736f5e869d..352eecaae789 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -142,7 +142,7 @@ struct cpu_dbs_common_info {
 	 */
 	unsigned int prev_load;
 	struct cpufreq_policy *cur_policy;
-	struct delayed_work work;
+	struct delayed_work dwork;
 	/*
 	 * percpu mutex that serializes governor limit change with gov_dbs_timer
 	 * invocation. We do not want gov_dbs_timer to run when user is changing
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 3c1e10f2304c..830aef1db8c3 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -194,7 +194,7 @@ static void od_check_cpu(int cpu, unsigned int load)
 static void od_dbs_timer(struct work_struct *work)
 {
 	struct od_cpu_dbs_info_s *dbs_info =
-		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
+		container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work);
 	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
 	struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
 			cpu);
@@ -275,18 +275,18 @@ static void update_sampling_rate(struct dbs_data *dbs_data,
 
 		mutex_lock(&dbs_info->cdbs.timer_mutex);
 
-		if (!delayed_work_pending(&dbs_info->cdbs.work)) {
+		if (!delayed_work_pending(&dbs_info->cdbs.dwork)) {
 			mutex_unlock(&dbs_info->cdbs.timer_mutex);
 			continue;
 		}
 
 		next_sampling = jiffies + usecs_to_jiffies(new_rate);
-		appointed_at = dbs_info->cdbs.work.timer.expires;
+		appointed_at = dbs_info->cdbs.dwork.timer.expires;
 
 		if (time_before(next_sampling, appointed_at)) {
 
 			mutex_unlock(&dbs_info->cdbs.timer_mutex);
-			cancel_delayed_work_sync(&dbs_info->cdbs.work);
+			cancel_delayed_work_sync(&dbs_info->cdbs.dwork);
 			mutex_lock(&dbs_info->cdbs.timer_mutex);
 
 			gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy,

From d3574c851148266177ea9ecae10a317e6eae94de Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 19 Jun 2015 17:18:02 +0530
Subject: [PATCH 08/53] cpufreq: governor: Drop unused field 'cpu'

Its not used at all, drop it.

Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.c | 1 -
 drivers/cpufreq/cpufreq_governor.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 6024bbc782c0..2149ba7d32a8 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -353,7 +353,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j);
 		unsigned int prev_load;
 
-		j_cdbs->cpu = j;
 		j_cdbs->cur_policy = policy;
 		j_cdbs->prev_cpu_idle =
 			get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 352eecaae789..1bbf8c87fdd5 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -130,7 +130,6 @@ static void *get_cpu_dbs_info_s(int cpu)				\
 
 /* Per cpu structures */
 struct cpu_dbs_common_info {
-	int cpu;
 	u64 prev_cpu_idle;
 	u64 prev_cpu_wall;
 	u64 prev_cpu_nice;

From 875b8508f9607b92e3ef4ece2fddf86d61351085 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 19 Jun 2015 17:18:03 +0530
Subject: [PATCH 09/53] cpufreq: governor: Rename 'cpu_dbs_common_info' to
 'cpu_dbs_info'

Its not common info to all CPUs, but a structure representing common
type of cpu info to both governor types. Lets drop 'common_' from its
name.

Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.c | 19 +++++++++----------
 drivers/cpufreq/cpufreq_governor.h | 13 ++++++-------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 2149ba7d32a8..529f236f2d05 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -32,7 +32,7 @@ static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
 
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 {
-	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	struct cpufreq_policy *policy;
@@ -64,7 +64,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 
 	/* Get Absolute Load */
 	for_each_cpu(j, policy->cpus) {
-		struct cpu_dbs_common_info *j_cdbs;
+		struct cpu_dbs_info *j_cdbs;
 		u64 cur_wall_time, cur_idle_time;
 		unsigned int idle_time, wall_time;
 		unsigned int load;
@@ -163,7 +163,7 @@ EXPORT_SYMBOL_GPL(dbs_check_cpu);
 static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
 		unsigned int delay)
 {
-	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 
 	mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay);
 }
@@ -199,7 +199,7 @@ EXPORT_SYMBOL_GPL(gov_queue_work);
 static inline void gov_cancel_work(struct dbs_data *dbs_data,
 		struct cpufreq_policy *policy)
 {
-	struct cpu_dbs_common_info *cdbs;
+	struct cpu_dbs_info *cdbs;
 	int i;
 
 	for_each_cpu(i, policy->cpus) {
@@ -209,8 +209,7 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
 }
 
 /* Will return if we need to evaluate cpu load again or not */
-bool need_load_eval(struct cpu_dbs_common_info *cdbs,
-		unsigned int sampling_rate)
+bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate)
 {
 	if (policy_is_shared(cdbs->cur_policy)) {
 		ktime_t time_now = ktime_get();
@@ -330,7 +329,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
-	struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
 	int io_busy = 0;
 
 	if (!policy->cur)
@@ -350,7 +349,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 	}
 
 	for_each_cpu(j, policy->cpus) {
-		struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j);
+		struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
 		unsigned int prev_load;
 
 		j_cdbs->cur_policy = policy;
@@ -398,7 +397,7 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy,
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int cpu = policy->cpu;
-	struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
 
 	if (cdata->governor == GOV_CONSERVATIVE) {
 		struct cs_cpu_dbs_info_s *cs_dbs_info =
@@ -418,7 +417,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy,
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int cpu = policy->cpu;
-	struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
 
 	if (!cpu_cdbs->cur_policy)
 		return;
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 1bbf8c87fdd5..6b5e33f68064 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -109,7 +109,7 @@ store_one(_gov, file_name)
 
 /* create helper routines */
 #define define_get_cpu_dbs_routines(_dbs_info)				\
-static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu)		\
+static struct cpu_dbs_info *get_cpu_cdbs(int cpu)			\
 {									\
 	return &per_cpu(_dbs_info, cpu).cdbs;				\
 }									\
@@ -129,7 +129,7 @@ static void *get_cpu_dbs_info_s(int cpu)				\
  */
 
 /* Per cpu structures */
-struct cpu_dbs_common_info {
+struct cpu_dbs_info {
 	u64 prev_cpu_idle;
 	u64 prev_cpu_wall;
 	u64 prev_cpu_nice;
@@ -152,7 +152,7 @@ struct cpu_dbs_common_info {
 };
 
 struct od_cpu_dbs_info_s {
-	struct cpu_dbs_common_info cdbs;
+	struct cpu_dbs_info cdbs;
 	struct cpufreq_frequency_table *freq_table;
 	unsigned int freq_lo;
 	unsigned int freq_lo_jiffies;
@@ -162,7 +162,7 @@ struct od_cpu_dbs_info_s {
 };
 
 struct cs_cpu_dbs_info_s {
-	struct cpu_dbs_common_info cdbs;
+	struct cpu_dbs_info cdbs;
 	unsigned int down_skip;
 	unsigned int requested_freq;
 	unsigned int enable:1;
@@ -203,7 +203,7 @@ struct common_dbs_data {
 	 */
 	struct dbs_data *gdbs_data;
 
-	struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu);
+	struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu);
 	void *(*get_cpu_dbs_info_s)(int cpu);
 	void (*gov_dbs_timer)(struct work_struct *work);
 	void (*gov_check_cpu)(int cpu, unsigned int load);
@@ -264,8 +264,7 @@ static ssize_t show_sampling_rate_min_gov_pol				\
 extern struct mutex cpufreq_governor_lock;
 
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
-bool need_load_eval(struct cpu_dbs_common_info *cdbs,
-		unsigned int sampling_rate);
+bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate);
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		struct common_dbs_data *cdata, unsigned int event);
 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,

From 49a9a40c1b48d24f0fd9a6b6be8a4038f47d13bf Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 19 Jun 2015 17:18:04 +0530
Subject: [PATCH 10/53] cpufreq: governor: name pointer to cpu_dbs_info as
 'cdbs'

It is called as 'cdbs' at most of the places and 'cpu_dbs' at others.
Lets use 'cdbs' consistently for better readability.

Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 529f236f2d05..4ea13f182154 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -329,7 +329,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
-	struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 	int io_busy = 0;
 
 	if (!policy->cur)
@@ -385,7 +385,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 	}
 
 	/* Initiate timer time stamp */
-	cpu_cdbs->time_stamp = ktime_get();
+	cdbs->time_stamp = ktime_get();
 
 	gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate),
 		       true);
@@ -397,7 +397,7 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy,
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int cpu = policy->cpu;
-	struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 
 	if (cdata->governor == GOV_CONSERVATIVE) {
 		struct cs_cpu_dbs_info_s *cs_dbs_info =
@@ -408,8 +408,8 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy,
 
 	gov_cancel_work(dbs_data, policy);
 
-	mutex_destroy(&cpu_cdbs->timer_mutex);
-	cpu_cdbs->cur_policy = NULL;
+	mutex_destroy(&cdbs->timer_mutex);
+	cdbs->cur_policy = NULL;
 }
 
 static void cpufreq_governor_limits(struct cpufreq_policy *policy,
@@ -417,20 +417,20 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy,
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int cpu = policy->cpu;
-	struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 
-	if (!cpu_cdbs->cur_policy)
+	if (!cdbs->cur_policy)
 		return;
 
-	mutex_lock(&cpu_cdbs->timer_mutex);
-	if (policy->max < cpu_cdbs->cur_policy->cur)
-		__cpufreq_driver_target(cpu_cdbs->cur_policy, policy->max,
+	mutex_lock(&cdbs->timer_mutex);
+	if (policy->max < cdbs->cur_policy->cur)
+		__cpufreq_driver_target(cdbs->cur_policy, policy->max,
 					CPUFREQ_RELATION_H);
-	else if (policy->min > cpu_cdbs->cur_policy->cur)
-		__cpufreq_driver_target(cpu_cdbs->cur_policy, policy->min,
+	else if (policy->min > cdbs->cur_policy->cur)
+		__cpufreq_driver_target(cdbs->cur_policy, policy->min,
 					CPUFREQ_RELATION_L);
 	dbs_check_cpu(dbs_data, cpu);
-	mutex_unlock(&cpu_cdbs->timer_mutex);
+	mutex_unlock(&cdbs->timer_mutex);
 }
 
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,

From 42994af63cd1aafc9289035cf621e501b08732e9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 19 Jun 2015 17:18:05 +0530
Subject: [PATCH 11/53] cpufreq: governor: rename cur_policy as policy

Just call it 'policy', cur_policy is unnecessarily long and doesn't
have any special meaning.

Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c | 10 +++++-----
 drivers/cpufreq/cpufreq_governor.c     | 18 +++++++++---------
 drivers/cpufreq/cpufreq_governor.h     |  2 +-
 drivers/cpufreq/cpufreq_ondemand.c     | 19 ++++++++++---------
 4 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 0e3ec1d968d9..af47d322679e 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
 static void cs_check_cpu(int cpu, unsigned int load)
 {
 	struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
-	struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
+	struct cpufreq_policy *policy = dbs_info->cdbs.policy;
 	struct dbs_data *dbs_data = policy->governor_data;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 
@@ -106,10 +106,10 @@ static void cs_dbs_timer(struct work_struct *work)
 {
 	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
 			struct cs_cpu_dbs_info_s, cdbs.dwork.work);
-	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
+	unsigned int cpu = dbs_info->cdbs.policy->cpu;
 	struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info,
 			cpu);
-	struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data;
+	struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	int delay = delay_for_sampling_rate(cs_tuners->sampling_rate);
 	bool modify_all = true;
@@ -120,7 +120,7 @@ static void cs_dbs_timer(struct work_struct *work)
 	else
 		dbs_check_cpu(dbs_data, cpu);
 
-	gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all);
+	gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all);
 	mutex_unlock(&core_dbs_info->cdbs.timer_mutex);
 }
 
@@ -135,7 +135,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	if (!dbs_info->enable)
 		return 0;
 
-	policy = dbs_info->cdbs.cur_policy;
+	policy = dbs_info->cdbs.policy;
 
 	/*
 	 * we only care if our internally tracked freq moves outside the 'valid'
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 4ea13f182154..c0566f86caed 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -60,7 +60,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 		ignore_nice = cs_tuners->ignore_nice_load;
 	}
 
-	policy = cdbs->cur_policy;
+	policy = cdbs->policy;
 
 	/* Get Absolute Load */
 	for_each_cpu(j, policy->cpus) {
@@ -211,7 +211,7 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
 /* Will return if we need to evaluate cpu load again or not */
 bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate)
 {
-	if (policy_is_shared(cdbs->cur_policy)) {
+	if (policy_is_shared(cdbs->policy)) {
 		ktime_t time_now = ktime_get();
 		s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp);
 
@@ -352,7 +352,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
 		unsigned int prev_load;
 
-		j_cdbs->cur_policy = policy;
+		j_cdbs->policy = policy;
 		j_cdbs->prev_cpu_idle =
 			get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
 
@@ -409,7 +409,7 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy,
 	gov_cancel_work(dbs_data, policy);
 
 	mutex_destroy(&cdbs->timer_mutex);
-	cdbs->cur_policy = NULL;
+	cdbs->policy = NULL;
 }
 
 static void cpufreq_governor_limits(struct cpufreq_policy *policy,
@@ -419,15 +419,15 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy,
 	unsigned int cpu = policy->cpu;
 	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 
-	if (!cdbs->cur_policy)
+	if (!cdbs->policy)
 		return;
 
 	mutex_lock(&cdbs->timer_mutex);
-	if (policy->max < cdbs->cur_policy->cur)
-		__cpufreq_driver_target(cdbs->cur_policy, policy->max,
+	if (policy->max < cdbs->policy->cur)
+		__cpufreq_driver_target(cdbs->policy, policy->max,
 					CPUFREQ_RELATION_H);
-	else if (policy->min > cdbs->cur_policy->cur)
-		__cpufreq_driver_target(cdbs->cur_policy, policy->min,
+	else if (policy->min > cdbs->policy->cur)
+		__cpufreq_driver_target(cdbs->policy, policy->min,
 					CPUFREQ_RELATION_L);
 	dbs_check_cpu(dbs_data, cpu);
 	mutex_unlock(&cdbs->timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 6b5e33f68064..a0f8eb79ee6d 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -140,7 +140,7 @@ struct cpu_dbs_info {
 	 * wake-up from idle.
 	 */
 	unsigned int prev_load;
-	struct cpufreq_policy *cur_policy;
+	struct cpufreq_policy *policy;
 	struct delayed_work dwork;
 	/*
 	 * percpu mutex that serializes governor limit change with gov_dbs_timer
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 830aef1db8c3..d29c6f9c6e3e 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -155,7 +155,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
 static void od_check_cpu(int cpu, unsigned int load)
 {
 	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
-	struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
+	struct cpufreq_policy *policy = dbs_info->cdbs.policy;
 	struct dbs_data *dbs_data = policy->governor_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 
@@ -195,10 +195,10 @@ static void od_dbs_timer(struct work_struct *work)
 {
 	struct od_cpu_dbs_info_s *dbs_info =
 		container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work);
-	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
+	unsigned int cpu = dbs_info->cdbs.policy->cpu;
 	struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
 			cpu);
-	struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data;
+	struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	int delay = 0, sample_type = core_dbs_info->sample_type;
 	bool modify_all = true;
@@ -213,8 +213,9 @@ static void od_dbs_timer(struct work_struct *work)
 	core_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 	if (sample_type == OD_SUB_SAMPLE) {
 		delay = core_dbs_info->freq_lo_jiffies;
-		__cpufreq_driver_target(core_dbs_info->cdbs.cur_policy,
-				core_dbs_info->freq_lo, CPUFREQ_RELATION_H);
+		__cpufreq_driver_target(core_dbs_info->cdbs.policy,
+					core_dbs_info->freq_lo,
+					CPUFREQ_RELATION_H);
 	} else {
 		dbs_check_cpu(dbs_data, cpu);
 		if (core_dbs_info->freq_lo) {
@@ -229,7 +230,7 @@ max_delay:
 		delay = delay_for_sampling_rate(od_tuners->sampling_rate
 				* core_dbs_info->rate_mult);
 
-	gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all);
+	gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all);
 	mutex_unlock(&core_dbs_info->cdbs.timer_mutex);
 }
 
@@ -289,8 +290,8 @@ static void update_sampling_rate(struct dbs_data *dbs_data,
 			cancel_delayed_work_sync(&dbs_info->cdbs.dwork);
 			mutex_lock(&dbs_info->cdbs.timer_mutex);
 
-			gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy,
-					usecs_to_jiffies(new_rate), true);
+			gov_queue_work(dbs_data, dbs_info->cdbs.policy,
+				       usecs_to_jiffies(new_rate), true);
 
 		}
 		mutex_unlock(&dbs_info->cdbs.timer_mutex);
@@ -559,7 +560,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
 		if (cpumask_test_cpu(cpu, &done))
 			continue;
 
-		policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
+		policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy;
 		if (!policy)
 			continue;
 

From 44152cb82d1ad6ae6f8b47c5437f6f1e65ca82c4 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 18 Jul 2015 11:30:59 +0530
Subject: [PATCH 12/53] cpufreq: governor: Keep single copy of information
 common to policy->cpus

Some information is common to all CPUs belonging to a policy, but are
kept on per-cpu basis. Lets keep that in another structure common to all
policy->cpus. That will make updates/reads to that less complex and less
error prone.

The memory for cpu_common_dbs_info is allocated/freed at INIT/EXIT, so
that it we don't reallocate it for STOP/START sequence. It will be also
be used (in next patch) while the governor is stopped and so must not be
freed that early.

Reviewed-and-tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c | 18 ++---
 drivers/cpufreq/cpufreq_governor.c     | 92 +++++++++++++++++++-------
 drivers/cpufreq/cpufreq_governor.h     | 24 ++++---
 drivers/cpufreq/cpufreq_ondemand.c     | 38 ++++++-----
 4 files changed, 114 insertions(+), 58 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index af47d322679e..d21c3cff9056 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
 static void cs_check_cpu(int cpu, unsigned int load)
 {
 	struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
-	struct cpufreq_policy *policy = dbs_info->cdbs.policy;
+	struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
 	struct dbs_data *dbs_data = policy->governor_data;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 
@@ -106,22 +106,24 @@ static void cs_dbs_timer(struct work_struct *work)
 {
 	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
 			struct cs_cpu_dbs_info_s, cdbs.dwork.work);
-	unsigned int cpu = dbs_info->cdbs.policy->cpu;
+	struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
+	unsigned int cpu = policy->cpu;
 	struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info,
 			cpu);
-	struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data;
+	struct dbs_data *dbs_data = policy->governor_data;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	int delay = delay_for_sampling_rate(cs_tuners->sampling_rate);
 	bool modify_all = true;
 
-	mutex_lock(&core_dbs_info->cdbs.timer_mutex);
-	if (!need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate))
+	mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex);
+	if (!need_load_eval(core_dbs_info->cdbs.shared,
+			    cs_tuners->sampling_rate))
 		modify_all = false;
 	else
 		dbs_check_cpu(dbs_data, cpu);
 
-	gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all);
-	mutex_unlock(&core_dbs_info->cdbs.timer_mutex);
+	gov_queue_work(dbs_data, policy, delay, modify_all);
+	mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex);
 }
 
 static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -135,7 +137,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	if (!dbs_info->enable)
 		return 0;
 
-	policy = dbs_info->cdbs.policy;
+	policy = dbs_info->cdbs.shared->policy;
 
 	/*
 	 * we only care if our internally tracked freq moves outside the 'valid'
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index c0566f86caed..b01cb729104b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -35,7 +35,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 	struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
-	struct cpufreq_policy *policy;
+	struct cpufreq_policy *policy = cdbs->shared->policy;
 	unsigned int sampling_rate;
 	unsigned int max_load = 0;
 	unsigned int ignore_nice;
@@ -60,8 +60,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 		ignore_nice = cs_tuners->ignore_nice_load;
 	}
 
-	policy = cdbs->policy;
-
 	/* Get Absolute Load */
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs;
@@ -209,17 +207,18 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
 }
 
 /* Will return if we need to evaluate cpu load again or not */
-bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate)
+bool need_load_eval(struct cpu_common_dbs_info *shared,
+		    unsigned int sampling_rate)
 {
-	if (policy_is_shared(cdbs->policy)) {
+	if (policy_is_shared(shared->policy)) {
 		ktime_t time_now = ktime_get();
-		s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp);
+		s64 delta_us = ktime_us_delta(time_now, shared->time_stamp);
 
 		/* Do nothing if we recently have sampled */
 		if (delta_us < (s64)(sampling_rate / 2))
 			return false;
 		else
-			cdbs->time_stamp = time_now;
+			shared->time_stamp = time_now;
 	}
 
 	return true;
@@ -238,6 +237,37 @@ static void set_sampling_rate(struct dbs_data *dbs_data,
 	}
 }
 
+static int alloc_common_dbs_info(struct cpufreq_policy *policy,
+				 struct common_dbs_data *cdata)
+{
+	struct cpu_common_dbs_info *shared;
+	int j;
+
+	/* Allocate memory for the common information for policy->cpus */
+	shared = kzalloc(sizeof(*shared), GFP_KERNEL);
+	if (!shared)
+		return -ENOMEM;
+
+	/* Set shared for all CPUs, online+offline */
+	for_each_cpu(j, policy->related_cpus)
+		cdata->get_cpu_cdbs(j)->shared = shared;
+
+	return 0;
+}
+
+static void free_common_dbs_info(struct cpufreq_policy *policy,
+				 struct common_dbs_data *cdata)
+{
+	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
+	struct cpu_common_dbs_info *shared = cdbs->shared;
+	int j;
+
+	for_each_cpu(j, policy->cpus)
+		cdata->get_cpu_cdbs(j)->shared = NULL;
+
+	kfree(shared);
+}
+
 static int cpufreq_governor_init(struct cpufreq_policy *policy,
 				 struct dbs_data *dbs_data,
 				 struct common_dbs_data *cdata)
@@ -248,6 +278,11 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
 	if (dbs_data) {
 		if (WARN_ON(have_governor_per_policy()))
 			return -EINVAL;
+
+		ret = alloc_common_dbs_info(policy, cdata);
+		if (ret)
+			return ret;
+
 		dbs_data->usage_count++;
 		policy->governor_data = dbs_data;
 		return 0;
@@ -257,12 +292,16 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
 	if (!dbs_data)
 		return -ENOMEM;
 
+	ret = alloc_common_dbs_info(policy, cdata);
+	if (ret)
+		goto free_dbs_data;
+
 	dbs_data->cdata = cdata;
 	dbs_data->usage_count = 1;
 
 	ret = cdata->init(dbs_data, !policy->governor->initialized);
 	if (ret)
-		goto free_dbs_data;
+		goto free_common_dbs_info;
 
 	/* policy latency is in ns. Convert it to us first */
 	latency = policy->cpuinfo.transition_latency / 1000;
@@ -299,6 +338,8 @@ put_kobj:
 	}
 cdata_exit:
 	cdata->exit(dbs_data, !policy->governor->initialized);
+free_common_dbs_info:
+	free_common_dbs_info(policy, cdata);
 free_dbs_data:
 	kfree(dbs_data);
 	return ret;
@@ -322,6 +363,8 @@ static void cpufreq_governor_exit(struct cpufreq_policy *policy,
 		cdata->exit(dbs_data, policy->governor->initialized == 1);
 		kfree(dbs_data);
 	}
+
+	free_common_dbs_info(policy, cdata);
 }
 
 static int cpufreq_governor_start(struct cpufreq_policy *policy,
@@ -330,6 +373,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
 	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_common_dbs_info *shared = cdbs->shared;
 	int io_busy = 0;
 
 	if (!policy->cur)
@@ -348,11 +392,14 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		io_busy = od_tuners->io_is_busy;
 	}
 
+	shared->policy = policy;
+	shared->time_stamp = ktime_get();
+	mutex_init(&shared->timer_mutex);
+
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
 		unsigned int prev_load;
 
-		j_cdbs->policy = policy;
 		j_cdbs->prev_cpu_idle =
 			get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
 
@@ -364,7 +411,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		if (ignore_nice)
 			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
-		mutex_init(&j_cdbs->timer_mutex);
 		INIT_DEFERRABLE_WORK(&j_cdbs->dwork, cdata->gov_dbs_timer);
 	}
 
@@ -384,9 +430,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		od_ops->powersave_bias_init_cpu(cpu);
 	}
 
-	/* Initiate timer time stamp */
-	cdbs->time_stamp = ktime_get();
-
 	gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate),
 		       true);
 	return 0;
@@ -398,6 +441,9 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy,
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int cpu = policy->cpu;
 	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
+	struct cpu_common_dbs_info *shared = cdbs->shared;
+
+	gov_cancel_work(dbs_data, policy);
 
 	if (cdata->governor == GOV_CONSERVATIVE) {
 		struct cs_cpu_dbs_info_s *cs_dbs_info =
@@ -406,10 +452,8 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy,
 		cs_dbs_info->enable = 0;
 	}
 
-	gov_cancel_work(dbs_data, policy);
-
-	mutex_destroy(&cdbs->timer_mutex);
-	cdbs->policy = NULL;
+	shared->policy = NULL;
+	mutex_destroy(&shared->timer_mutex);
 }
 
 static void cpufreq_governor_limits(struct cpufreq_policy *policy,
@@ -419,18 +463,18 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy,
 	unsigned int cpu = policy->cpu;
 	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 
-	if (!cdbs->policy)
+	if (!cdbs->shared || !cdbs->shared->policy)
 		return;
 
-	mutex_lock(&cdbs->timer_mutex);
-	if (policy->max < cdbs->policy->cur)
-		__cpufreq_driver_target(cdbs->policy, policy->max,
+	mutex_lock(&cdbs->shared->timer_mutex);
+	if (policy->max < cdbs->shared->policy->cur)
+		__cpufreq_driver_target(cdbs->shared->policy, policy->max,
 					CPUFREQ_RELATION_H);
-	else if (policy->min > cdbs->policy->cur)
-		__cpufreq_driver_target(cdbs->policy, policy->min,
+	else if (policy->min > cdbs->shared->policy->cur)
+		__cpufreq_driver_target(cdbs->shared->policy, policy->min,
 					CPUFREQ_RELATION_L);
 	dbs_check_cpu(dbs_data, cpu);
-	mutex_unlock(&cdbs->timer_mutex);
+	mutex_unlock(&cdbs->shared->timer_mutex);
 }
 
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index a0f8eb79ee6d..8e4a25f0730c 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -128,6 +128,18 @@ static void *get_cpu_dbs_info_s(int cpu)				\
  * cs_*: Conservative governor
  */
 
+/* Common to all CPUs of a policy */
+struct cpu_common_dbs_info {
+	struct cpufreq_policy *policy;
+	/*
+	 * percpu mutex that serializes governor limit change with gov_dbs_timer
+	 * invocation. We do not want gov_dbs_timer to run when user is changing
+	 * the governor or limits.
+	 */
+	struct mutex timer_mutex;
+	ktime_t time_stamp;
+};
+
 /* Per cpu structures */
 struct cpu_dbs_info {
 	u64 prev_cpu_idle;
@@ -140,15 +152,8 @@ struct cpu_dbs_info {
 	 * wake-up from idle.
 	 */
 	unsigned int prev_load;
-	struct cpufreq_policy *policy;
 	struct delayed_work dwork;
-	/*
-	 * percpu mutex that serializes governor limit change with gov_dbs_timer
-	 * invocation. We do not want gov_dbs_timer to run when user is changing
-	 * the governor or limits.
-	 */
-	struct mutex timer_mutex;
-	ktime_t time_stamp;
+	struct cpu_common_dbs_info *shared;
 };
 
 struct od_cpu_dbs_info_s {
@@ -264,7 +269,8 @@ static ssize_t show_sampling_rate_min_gov_pol				\
 extern struct mutex cpufreq_governor_lock;
 
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
-bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate);
+bool need_load_eval(struct cpu_common_dbs_info *shared,
+		    unsigned int sampling_rate);
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		struct common_dbs_data *cdata, unsigned int event);
 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index d29c6f9c6e3e..14d7e86e7f94 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -155,7 +155,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
 static void od_check_cpu(int cpu, unsigned int load)
 {
 	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
-	struct cpufreq_policy *policy = dbs_info->cdbs.policy;
+	struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
 	struct dbs_data *dbs_data = policy->governor_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 
@@ -195,16 +195,18 @@ static void od_dbs_timer(struct work_struct *work)
 {
 	struct od_cpu_dbs_info_s *dbs_info =
 		container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work);
-	unsigned int cpu = dbs_info->cdbs.policy->cpu;
+	struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
+	unsigned int cpu = policy->cpu;
 	struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
 			cpu);
-	struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data;
+	struct dbs_data *dbs_data = policy->governor_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	int delay = 0, sample_type = core_dbs_info->sample_type;
 	bool modify_all = true;
 
-	mutex_lock(&core_dbs_info->cdbs.timer_mutex);
-	if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) {
+	mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex);
+	if (!need_load_eval(core_dbs_info->cdbs.shared,
+			    od_tuners->sampling_rate)) {
 		modify_all = false;
 		goto max_delay;
 	}
@@ -213,8 +215,7 @@ static void od_dbs_timer(struct work_struct *work)
 	core_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 	if (sample_type == OD_SUB_SAMPLE) {
 		delay = core_dbs_info->freq_lo_jiffies;
-		__cpufreq_driver_target(core_dbs_info->cdbs.policy,
-					core_dbs_info->freq_lo,
+		__cpufreq_driver_target(policy, core_dbs_info->freq_lo,
 					CPUFREQ_RELATION_H);
 	} else {
 		dbs_check_cpu(dbs_data, cpu);
@@ -230,8 +231,8 @@ max_delay:
 		delay = delay_for_sampling_rate(od_tuners->sampling_rate
 				* core_dbs_info->rate_mult);
 
-	gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all);
-	mutex_unlock(&core_dbs_info->cdbs.timer_mutex);
+	gov_queue_work(dbs_data, policy, delay, modify_all);
+	mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex);
 }
 
 /************************** sysfs interface ************************/
@@ -274,10 +275,10 @@ static void update_sampling_rate(struct dbs_data *dbs_data,
 		dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 		cpufreq_cpu_put(policy);
 
-		mutex_lock(&dbs_info->cdbs.timer_mutex);
+		mutex_lock(&dbs_info->cdbs.shared->timer_mutex);
 
 		if (!delayed_work_pending(&dbs_info->cdbs.dwork)) {
-			mutex_unlock(&dbs_info->cdbs.timer_mutex);
+			mutex_unlock(&dbs_info->cdbs.shared->timer_mutex);
 			continue;
 		}
 
@@ -286,15 +287,15 @@ static void update_sampling_rate(struct dbs_data *dbs_data,
 
 		if (time_before(next_sampling, appointed_at)) {
 
-			mutex_unlock(&dbs_info->cdbs.timer_mutex);
+			mutex_unlock(&dbs_info->cdbs.shared->timer_mutex);
 			cancel_delayed_work_sync(&dbs_info->cdbs.dwork);
-			mutex_lock(&dbs_info->cdbs.timer_mutex);
+			mutex_lock(&dbs_info->cdbs.shared->timer_mutex);
 
-			gov_queue_work(dbs_data, dbs_info->cdbs.policy,
+			gov_queue_work(dbs_data, policy,
 				       usecs_to_jiffies(new_rate), true);
 
 		}
-		mutex_unlock(&dbs_info->cdbs.timer_mutex);
+		mutex_unlock(&dbs_info->cdbs.shared->timer_mutex);
 	}
 }
 
@@ -557,13 +558,16 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
 
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
+		struct cpu_common_dbs_info *shared;
+
 		if (cpumask_test_cpu(cpu, &done))
 			continue;
 
-		policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy;
-		if (!policy)
+		shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared;
+		if (!shared)
 			continue;
 
+		policy = shared->policy;
 		cpumask_or(&done, &done, policy->cpus);
 
 		if (policy->governor != &cpufreq_gov_ondemand)

From 43e0ee361e96229959c2ce1eda1ad9d6b3c191b2 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 18 Jul 2015 11:31:00 +0530
Subject: [PATCH 13/53] cpufreq: governor: split out common part of
 {cs|od}_dbs_timer()

Some part of cs_dbs_timer() and od_dbs_timer() is exactly same and is
unnecessarily duplicated.

Create the real work-handler in cpufreq_governor.c and put the common
code in this routine (dbs_timer()).

Shouldn't make any functional change.

Reviewed-and-tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c | 23 ++++------------
 drivers/cpufreq/cpufreq_governor.c     | 38 +++++++++++++++++++++++---
 drivers/cpufreq/cpufreq_governor.h     | 10 +++----
 drivers/cpufreq/cpufreq_ondemand.c     | 36 ++++++++++--------------
 4 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index d21c3cff9056..84a1506950a7 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -102,28 +102,15 @@ static void cs_check_cpu(int cpu, unsigned int load)
 	}
 }
 
-static void cs_dbs_timer(struct work_struct *work)
+static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs,
+				 struct dbs_data *dbs_data, bool modify_all)
 {
-	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
-			struct cs_cpu_dbs_info_s, cdbs.dwork.work);
-	struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
-	unsigned int cpu = policy->cpu;
-	struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info,
-			cpu);
-	struct dbs_data *dbs_data = policy->governor_data;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
-	int delay = delay_for_sampling_rate(cs_tuners->sampling_rate);
-	bool modify_all = true;
 
-	mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex);
-	if (!need_load_eval(core_dbs_info->cdbs.shared,
-			    cs_tuners->sampling_rate))
-		modify_all = false;
-	else
-		dbs_check_cpu(dbs_data, cpu);
+	if (modify_all)
+		dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu);
 
-	gov_queue_work(dbs_data, policy, delay, modify_all);
-	mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex);
+	return delay_for_sampling_rate(cs_tuners->sampling_rate);
 }
 
 static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index b01cb729104b..7ed0ec2ac853 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -207,8 +207,8 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
 }
 
 /* Will return if we need to evaluate cpu load again or not */
-bool need_load_eval(struct cpu_common_dbs_info *shared,
-		    unsigned int sampling_rate)
+static bool need_load_eval(struct cpu_common_dbs_info *shared,
+			   unsigned int sampling_rate)
 {
 	if (policy_is_shared(shared->policy)) {
 		ktime_t time_now = ktime_get();
@@ -223,7 +223,37 @@ bool need_load_eval(struct cpu_common_dbs_info *shared,
 
 	return true;
 }
-EXPORT_SYMBOL_GPL(need_load_eval);
+
+static void dbs_timer(struct work_struct *work)
+{
+	struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info,
+						 dwork.work);
+	struct cpu_common_dbs_info *shared = cdbs->shared;
+	struct cpufreq_policy *policy = shared->policy;
+	struct dbs_data *dbs_data = policy->governor_data;
+	unsigned int sampling_rate, delay;
+	bool modify_all = true;
+
+	mutex_lock(&shared->timer_mutex);
+
+	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
+		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
+
+		sampling_rate = cs_tuners->sampling_rate;
+	} else {
+		struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+
+		sampling_rate = od_tuners->sampling_rate;
+	}
+
+	if (!need_load_eval(cdbs->shared, sampling_rate))
+		modify_all = false;
+
+	delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all);
+	gov_queue_work(dbs_data, policy, delay, modify_all);
+
+	mutex_unlock(&shared->timer_mutex);
+}
 
 static void set_sampling_rate(struct dbs_data *dbs_data,
 		unsigned int sampling_rate)
@@ -411,7 +441,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 		if (ignore_nice)
 			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
-		INIT_DEFERRABLE_WORK(&j_cdbs->dwork, cdata->gov_dbs_timer);
+		INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer);
 	}
 
 	if (cdata->governor == GOV_CONSERVATIVE) {
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 8e4a25f0730c..50f171796632 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -132,8 +132,8 @@ static void *get_cpu_dbs_info_s(int cpu)				\
 struct cpu_common_dbs_info {
 	struct cpufreq_policy *policy;
 	/*
-	 * percpu mutex that serializes governor limit change with gov_dbs_timer
-	 * invocation. We do not want gov_dbs_timer to run when user is changing
+	 * percpu mutex that serializes governor limit change with dbs_timer
+	 * invocation. We do not want dbs_timer to run when user is changing
 	 * the governor or limits.
 	 */
 	struct mutex timer_mutex;
@@ -210,7 +210,9 @@ struct common_dbs_data {
 
 	struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu);
 	void *(*get_cpu_dbs_info_s)(int cpu);
-	void (*gov_dbs_timer)(struct work_struct *work);
+	unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs,
+				      struct dbs_data *dbs_data,
+				      bool modify_all);
 	void (*gov_check_cpu)(int cpu, unsigned int load);
 	int (*init)(struct dbs_data *dbs_data, bool notify);
 	void (*exit)(struct dbs_data *dbs_data, bool notify);
@@ -269,8 +271,6 @@ static ssize_t show_sampling_rate_min_gov_pol				\
 extern struct mutex cpufreq_governor_lock;
 
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
-bool need_load_eval(struct cpu_common_dbs_info *shared,
-		    unsigned int sampling_rate);
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		struct common_dbs_data *cdata, unsigned int event);
 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 14d7e86e7f94..1fa9088c84a8 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -191,48 +191,40 @@ static void od_check_cpu(int cpu, unsigned int load)
 	}
 }
 
-static void od_dbs_timer(struct work_struct *work)
+static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs,
+				 struct dbs_data *dbs_data, bool modify_all)
 {
-	struct od_cpu_dbs_info_s *dbs_info =
-		container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work);
-	struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
+	struct cpufreq_policy *policy = cdbs->shared->policy;
 	unsigned int cpu = policy->cpu;
-	struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
+	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
 			cpu);
-	struct dbs_data *dbs_data = policy->governor_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
-	int delay = 0, sample_type = core_dbs_info->sample_type;
-	bool modify_all = true;
+	int delay = 0, sample_type = dbs_info->sample_type;
 
-	mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex);
-	if (!need_load_eval(core_dbs_info->cdbs.shared,
-			    od_tuners->sampling_rate)) {
-		modify_all = false;
+	if (!modify_all)
 		goto max_delay;
-	}
 
 	/* Common NORMAL_SAMPLE setup */
-	core_dbs_info->sample_type = OD_NORMAL_SAMPLE;
+	dbs_info->sample_type = OD_NORMAL_SAMPLE;
 	if (sample_type == OD_SUB_SAMPLE) {
-		delay = core_dbs_info->freq_lo_jiffies;
-		__cpufreq_driver_target(policy, core_dbs_info->freq_lo,
+		delay = dbs_info->freq_lo_jiffies;
+		__cpufreq_driver_target(policy, dbs_info->freq_lo,
 					CPUFREQ_RELATION_H);
 	} else {
 		dbs_check_cpu(dbs_data, cpu);
-		if (core_dbs_info->freq_lo) {
+		if (dbs_info->freq_lo) {
 			/* Setup timer for SUB_SAMPLE */
-			core_dbs_info->sample_type = OD_SUB_SAMPLE;
-			delay = core_dbs_info->freq_hi_jiffies;
+			dbs_info->sample_type = OD_SUB_SAMPLE;
+			delay = dbs_info->freq_hi_jiffies;
 		}
 	}
 
 max_delay:
 	if (!delay)
 		delay = delay_for_sampling_rate(od_tuners->sampling_rate
-				* core_dbs_info->rate_mult);
+				* dbs_info->rate_mult);
 
-	gov_queue_work(dbs_data, policy, delay, modify_all);
-	mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex);
+	return delay;
 }
 
 /************************** sysfs interface ************************/

From a72c49590a1f9e5d26a71c3f807dbb8958c93513 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 18 Jul 2015 11:31:01 +0530
Subject: [PATCH 14/53] cpufreq: governor: Avoid invalid states with additional
 checks

There can be races where the request has come to a wrong state. For
example INIT followed by STOP (instead of START) or START followed by
EXIT (instead of STOP).

Address these races by making sure the state-machine never gets into
any invalid state. Also return an error if an invalid state-transition
is requested.

Reviewed-and-tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.c | 46 +++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 7ed0ec2ac853..f225dc975415 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -305,6 +305,10 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
 	unsigned int latency;
 	int ret;
 
+	/* State should be equivalent to EXIT */
+	if (policy->governor_data)
+		return -EBUSY;
+
 	if (dbs_data) {
 		if (WARN_ON(have_governor_per_policy()))
 			return -EINVAL;
@@ -375,10 +379,15 @@ free_dbs_data:
 	return ret;
 }
 
-static void cpufreq_governor_exit(struct cpufreq_policy *policy,
-				  struct dbs_data *dbs_data)
+static int cpufreq_governor_exit(struct cpufreq_policy *policy,
+				 struct dbs_data *dbs_data)
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
+	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
+
+	/* State should be equivalent to INIT */
+	if (!cdbs->shared || cdbs->shared->policy)
+		return -EBUSY;
 
 	policy->governor_data = NULL;
 	if (!--dbs_data->usage_count) {
@@ -395,6 +404,7 @@ static void cpufreq_governor_exit(struct cpufreq_policy *policy,
 	}
 
 	free_common_dbs_info(policy, cdata);
+	return 0;
 }
 
 static int cpufreq_governor_start(struct cpufreq_policy *policy,
@@ -409,6 +419,10 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 	if (!policy->cur)
 		return -EINVAL;
 
+	/* State should be equivalent to INIT */
+	if (!shared || shared->policy)
+		return -EBUSY;
+
 	if (cdata->governor == GOV_CONSERVATIVE) {
 		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 
@@ -465,14 +479,18 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
 	return 0;
 }
 
-static void cpufreq_governor_stop(struct cpufreq_policy *policy,
-				  struct dbs_data *dbs_data)
+static int cpufreq_governor_stop(struct cpufreq_policy *policy,
+				 struct dbs_data *dbs_data)
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int cpu = policy->cpu;
 	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 	struct cpu_common_dbs_info *shared = cdbs->shared;
 
+	/* State should be equivalent to START */
+	if (!shared || !shared->policy)
+		return -EBUSY;
+
 	gov_cancel_work(dbs_data, policy);
 
 	if (cdata->governor == GOV_CONSERVATIVE) {
@@ -484,17 +502,19 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy,
 
 	shared->policy = NULL;
 	mutex_destroy(&shared->timer_mutex);
+	return 0;
 }
 
-static void cpufreq_governor_limits(struct cpufreq_policy *policy,
-				    struct dbs_data *dbs_data)
+static int cpufreq_governor_limits(struct cpufreq_policy *policy,
+				   struct dbs_data *dbs_data)
 {
 	struct common_dbs_data *cdata = dbs_data->cdata;
 	unsigned int cpu = policy->cpu;
 	struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
 
+	/* State should be equivalent to START */
 	if (!cdbs->shared || !cdbs->shared->policy)
-		return;
+		return -EBUSY;
 
 	mutex_lock(&cdbs->shared->timer_mutex);
 	if (policy->max < cdbs->shared->policy->cur)
@@ -505,13 +525,15 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy,
 					CPUFREQ_RELATION_L);
 	dbs_check_cpu(dbs_data, cpu);
 	mutex_unlock(&cdbs->shared->timer_mutex);
+
+	return 0;
 }
 
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			 struct common_dbs_data *cdata, unsigned int event)
 {
 	struct dbs_data *dbs_data;
-	int ret = 0;
+	int ret;
 
 	/* Lock governor to block concurrent initialization of governor */
 	mutex_lock(&cdata->mutex);
@@ -531,17 +553,19 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		ret = cpufreq_governor_init(policy, dbs_data, cdata);
 		break;
 	case CPUFREQ_GOV_POLICY_EXIT:
-		cpufreq_governor_exit(policy, dbs_data);
+		ret = cpufreq_governor_exit(policy, dbs_data);
 		break;
 	case CPUFREQ_GOV_START:
 		ret = cpufreq_governor_start(policy, dbs_data);
 		break;
 	case CPUFREQ_GOV_STOP:
-		cpufreq_governor_stop(policy, dbs_data);
+		ret = cpufreq_governor_stop(policy, dbs_data);
 		break;
 	case CPUFREQ_GOV_LIMITS:
-		cpufreq_governor_limits(policy, dbs_data);
+		ret = cpufreq_governor_limits(policy, dbs_data);
 		break;
+	default:
+		ret = -EINVAL;
 	}
 
 unlock:

From 871ef3b53a2f4dd9be348c07b77df3c4bd74a37f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 18 Jul 2015 11:31:02 +0530
Subject: [PATCH 15/53] cpufreq: governor: Don't WARN on invalid states

With previous commit, governors have started to return errors on invalid
state-transition requests. We already have a WARN for an invalid
state-transition request in cpufreq_governor_dbs(). This does trigger
today, as the sequence of events isn't guaranteed by cpufreq core.

Lets stop warning on that for now, and make sure we don't enter an
invalid state.

Reviewed-and-tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index f225dc975415..939197ffa4ac 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -543,7 +543,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 	else
 		dbs_data = cdata->gdbs_data;
 
-	if (WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT))) {
+	if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) {
 		ret = -EINVAL;
 		goto unlock;
 	}

From 4bc384ae6299d3f3a948efabda2a423e2a293ee0 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 18 Jul 2015 11:31:03 +0530
Subject: [PATCH 16/53] cpufreq: propagate errors returned from
 __cpufreq_governor()

Return codes aren't honored properly in cpufreq_set_policy(). This can
lead to two problems:
- wrong errors propagated to sysfs
- we try to do next state-change even if the previous one failed

cpufreq_governor_dbs() now returns proper errors on all invalid
state-transition requests and this code should honor that.

Reviewed-and-tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a7b6ac6e048e..a3e8fb61cbcc 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2295,16 +2295,31 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	old_gov = policy->governor;
 	/* end old governor */
 	if (old_gov) {
-		__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+		if (ret) {
+			/* This can happen due to race with other operations */
+			pr_debug("%s: Failed to Stop Governor: %s (%d)\n",
+				 __func__, old_gov->name, ret);
+			return ret;
+		}
+
 		up_write(&policy->rwsem);
-		__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
 		down_write(&policy->rwsem);
+
+		if (ret) {
+			pr_err("%s: Failed to Exit Governor: %s (%d)\n",
+			       __func__, old_gov->name, ret);
+			return ret;
+		}
 	}
 
 	/* start new governor */
 	policy->governor = new_policy->governor;
-	if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) {
-		if (!__cpufreq_governor(policy, CPUFREQ_GOV_START))
+	ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
+	if (!ret) {
+		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
+		if (!ret)
 			goto out;
 
 		up_write(&policy->rwsem);
@@ -2316,11 +2331,13 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	pr_debug("starting governor %s failed\n", policy->governor->name);
 	if (old_gov) {
 		policy->governor = old_gov;
-		__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
-		__cpufreq_governor(policy, CPUFREQ_GOV_START);
+		if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
+			policy->governor = NULL;
+		else
+			__cpufreq_governor(policy, CPUFREQ_GOV_START);
 	}
 
-	return -EINVAL;
+	return ret;
 
  out:
 	pr_debug("governor: change or update limits\n");

From b2f8dc4ce6626e25b164e29cf72b70230a1f1711 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Jul 2015 22:11:16 +0200
Subject: [PATCH 17/53] ACPI / processor: Drop an unused argument of a cleanup
 routine

acpi_processor_unregister_performance() actually doesn't use its
first argument, so drop it and update the callers accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/acpi/processor_perflib.c    |  4 +---
 drivers/cpufreq/acpi-cpufreq.c      |  5 ++---
 drivers/cpufreq/e_powersaver.c      |  2 +-
 drivers/cpufreq/ia64-acpi-cpufreq.c |  5 ++---
 drivers/cpufreq/powernow-k7.c       |  4 ++--
 drivers/cpufreq/powernow-k8.c       |  5 ++---
 drivers/xen/xen-acpi-processor.c    | 16 ++++++----------
 include/acpi/processor.h            |  5 +----
 8 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index cfc8aba72f86..36b6da2918a6 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -784,9 +784,7 @@ acpi_processor_register_performance(struct acpi_processor_performance
 
 EXPORT_SYMBOL(acpi_processor_register_performance);
 
-void
-acpi_processor_unregister_performance(struct acpi_processor_performance
-				      *performance, unsigned int cpu)
+void acpi_processor_unregister_performance(unsigned int cpu)
 {
 	struct acpi_processor *pr;
 
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index de54ce14eb39..ca9d0f6edf08 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -844,7 +844,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 err_freqfree:
 	kfree(data->freq_table);
 err_unreg:
-	acpi_processor_unregister_performance(perf, cpu);
+	acpi_processor_unregister_performance(cpu);
 err_free_mask:
 	free_cpumask_var(data->freqdomain_cpus);
 err_free:
@@ -862,8 +862,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 
 	if (data) {
 		policy->driver_data = NULL;
-		acpi_processor_unregister_performance(data->acpi_data,
-						      data->acpi_perf_cpu);
+		acpi_processor_unregister_performance(data->acpi_perf_cpu);
 		free_cpumask_var(data->freqdomain_cpus);
 		kfree(data->freq_table);
 		kfree(data);
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index a0d2a423cea9..4085244c8a67 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -78,7 +78,7 @@ static int eps_acpi_init(void)
 static int eps_acpi_exit(struct cpufreq_policy *policy)
 {
 	if (eps_acpi_cpu_perf) {
-		acpi_processor_unregister_performance(eps_acpi_cpu_perf, 0);
+		acpi_processor_unregister_performance(0);
 		free_cpumask_var(eps_acpi_cpu_perf->shared_cpu_map);
 		kfree(eps_acpi_cpu_perf);
 		eps_acpi_cpu_perf = NULL;
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index c30aaa6a54e8..a9c193286ef4 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -313,7 +313,7 @@ acpi_cpufreq_cpu_init (
  err_freqfree:
 	kfree(data->freq_table);
  err_unreg:
-	acpi_processor_unregister_performance(&data->acpi_data, cpu);
+	acpi_processor_unregister_performance(cpu);
  err_free:
 	kfree(data);
 	acpi_io_data[cpu] = NULL;
@@ -332,8 +332,7 @@ acpi_cpufreq_cpu_exit (
 
 	if (data) {
 		acpi_io_data[policy->cpu] = NULL;
-		acpi_processor_unregister_performance(&data->acpi_data,
-		                                      policy->cpu);
+		acpi_processor_unregister_performance(policy->cpu);
 		kfree(data);
 	}
 
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index 37c5742482d8..c1ae1999770a 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -421,7 +421,7 @@ static int powernow_acpi_init(void)
 	return 0;
 
 err2:
-	acpi_processor_unregister_performance(acpi_processor_perf, 0);
+	acpi_processor_unregister_performance(0);
 err1:
 	free_cpumask_var(acpi_processor_perf->shared_cpu_map);
 err05:
@@ -661,7 +661,7 @@ static int powernow_cpu_exit(struct cpufreq_policy *policy)
 {
 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
 	if (acpi_processor_perf) {
-		acpi_processor_unregister_performance(acpi_processor_perf, 0);
+		acpi_processor_unregister_performance(0);
 		free_cpumask_var(acpi_processor_perf->shared_cpu_map);
 		kfree(acpi_processor_perf);
 	}
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 5c035d04d827..0b5bf135b090 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -795,7 +795,7 @@ err_out_mem:
 	kfree(powernow_table);
 
 err_out:
-	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+	acpi_processor_unregister_performance(data->cpu);
 
 	/* data->acpi_data.state_count informs us at ->exit()
 	 * whether ACPI was used */
@@ -863,8 +863,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
 {
 	if (data->acpi_data.state_count)
-		acpi_processor_unregister_performance(&data->acpi_data,
-				data->cpu);
+		acpi_processor_unregister_performance(data->cpu);
 	free_cpumask_var(data->acpi_data.shared_cpu_map);
 }
 
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 59fc190f1e92..70fa438000af 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -560,11 +560,9 @@ static int __init xen_acpi_processor_init(void)
 
 	return 0;
 err_unregister:
-	for_each_possible_cpu(i) {
-		struct acpi_processor_performance *perf;
-		perf = per_cpu_ptr(acpi_perf_data, i);
-		acpi_processor_unregister_performance(perf, i);
-	}
+	for_each_possible_cpu(i)
+		acpi_processor_unregister_performance(i);
+
 err_out:
 	/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
 	free_acpi_perf_data();
@@ -579,11 +577,9 @@ static void __exit xen_acpi_processor_exit(void)
 	kfree(acpi_ids_done);
 	kfree(acpi_id_present);
 	kfree(acpi_id_cst_present);
-	for_each_possible_cpu(i) {
-		struct acpi_processor_performance *perf;
-		perf = per_cpu_ptr(acpi_perf_data, i);
-		acpi_processor_unregister_performance(perf, i);
-	}
+	for_each_possible_cpu(i)
+		acpi_processor_unregister_performance(i);
+
 	free_acpi_perf_data();
 }
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 4188a4d3b597..aad1f2a3cd2b 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -228,10 +228,7 @@ extern int acpi_processor_preregister_performance(struct
 
 extern int acpi_processor_register_performance(struct acpi_processor_performance
 					       *performance, unsigned int cpu);
-extern void acpi_processor_unregister_performance(struct
-						  acpi_processor_performance
-						  *performance,
-						  unsigned int cpu);
+extern void acpi_processor_unregister_performance(unsigned int cpu);
 
 /* note: this locks both the calling module and the processor module
          if a _PPC object exists, rmmod is disallowed then */

From 3427616b2a5aa7f34716278aa6a140378a00a36e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Jul 2015 22:11:56 +0200
Subject: [PATCH 18/53] cpufreq: acpi-cpufreq: Drop acpi_data from struct
 acpi_cpufreq_data

After commit 8cfcfd39000d (acpi-cpufreq: Fix an ACPI perf unregister
issue) we store both a pointer to per-CPU data of the first policy
CPU and the number of that CPU which are redundant.

Since the CPU number has to be stored anyway for the unregistration,
the pointer to the CPU's per-CPU data may be dropped and we can
access the data in question via per_cpu_ptr().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/acpi-cpufreq.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index ca9d0f6edf08..af2bb8883d96 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -65,7 +65,6 @@ enum {
 #define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
 
 struct acpi_cpufreq_data {
-	struct acpi_processor_performance *acpi_data;
 	struct cpufreq_frequency_table *freq_table;
 	unsigned int resume;
 	unsigned int cpu_feature;
@@ -76,6 +75,11 @@ struct acpi_cpufreq_data {
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance __percpu *acpi_perf_data;
 
+static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
+{
+	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
+}
+
 static struct cpufreq_driver acpi_cpufreq_driver;
 
 static unsigned int acpi_pstate_strict;
@@ -201,7 +205,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
 	struct acpi_processor_performance *perf;
 	int i;
 
-	perf = data->acpi_data;
+	perf = to_perf_data(data);
 
 	for (i = 0; i < perf->state_count; i++) {
 		if (value == perf->states[i].status)
@@ -220,7 +224,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
 	else
 		msr &= INTEL_MSR_RANGE;
 
-	perf = data->acpi_data;
+	perf = to_perf_data(data);
 
 	cpufreq_for_each_entry(pos, data->freq_table)
 		if (msr == perf->states[pos->driver_data].status)
@@ -346,7 +350,7 @@ get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
 		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
-		perf = data->acpi_data;
+		perf = to_perf_data(data);
 		cmd.addr.io.port = perf->control_register.address;
 		cmd.addr.io.bit_width = perf->control_register.bit_width;
 		break;
@@ -377,10 +381,10 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 
 	data = policy->driver_data;
 	cpufreq_cpu_put(policy);
-	if (unlikely(!data || !data->acpi_data || !data->freq_table))
+	if (unlikely(!data || !data->freq_table))
 		return 0;
 
-	cached_freq = data->freq_table[data->acpi_data->state].frequency;
+	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
 	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
 	if (freq != cached_freq) {
 		/*
@@ -419,12 +423,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	int result = 0;
 
-	if (unlikely(data == NULL ||
-	     data->acpi_data == NULL || data->freq_table == NULL)) {
+	if (unlikely(data == NULL || data->freq_table == NULL)) {
 		return -ENODEV;
 	}
 
-	perf = data->acpi_data;
+	perf = to_perf_data(data);
 	next_perf_state = data->freq_table[index].driver_data;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
@@ -487,8 +490,9 @@ out:
 static unsigned long
 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
 {
-	struct acpi_processor_performance *perf = data->acpi_data;
+	struct acpi_processor_performance *perf;
 
+	perf = to_perf_data(data);
 	if (cpu_khz) {
 		/* search the closest match to cpu_khz */
 		unsigned int i;
@@ -677,18 +681,17 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto err_free;
 	}
 
-	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
+	perf = per_cpu_ptr(acpi_perf_data, cpu);
 	data->acpi_perf_cpu = cpu;
 	policy->driver_data = data;
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
 
-	result = acpi_processor_register_performance(data->acpi_data, cpu);
+	result = acpi_processor_register_performance(perf, cpu);
 	if (result)
 		goto err_free_mask;
 
-	perf = data->acpi_data;
 	policy->shared_type = perf->shared_type;
 
 	/*

From f56c50e322eed07526a08edefa29fc8bab1e93df Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 22 Jul 2015 22:12:10 +0200
Subject: [PATCH 19/53] cpufreq: acpi-cpufreq: Fix up the handling of cpb sysfs
 attribute

The cpb sysfs attribute is only exposed by the ACPI cpufreq driver
after a runtime check.  For this purpose, the driver keeps a NULL
placeholder in its table of sysfs attributes and replaces the NULL
with a pointer to an attribute structure if it decides to expose
cpb.

That is confusing, so make the driver set the pointer to the cpb
attribute structure upfront and replace it with NULL if the
attribute should not be exposed instead.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/acpi-cpufreq.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index af2bb8883d96..15b921a9248c 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -888,7 +888,9 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 static struct freq_attr *acpi_cpufreq_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	&freqdomain_cpus,
-	NULL,	/* this is a placeholder for cpb, do not remove */
+#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
+	&cpb,
+#endif
 	NULL,
 };
 
@@ -961,17 +963,16 @@ static int __init acpi_cpufreq_init(void)
 	 * only if configured. This is considered legacy code, which
 	 * will probably be removed at some point in the future.
 	 */
-	if (check_amd_hwpstate_cpu(0)) {
-		struct freq_attr **iter;
+	if (!check_amd_hwpstate_cpu(0)) {
+		struct freq_attr **attr;
 
-		pr_debug("adding sysfs entry for cpb\n");
+		pr_debug("CPB unsupported, do not expose it\n");
 
-		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
-			;
-
-		/* make sure there is a terminator behind it */
-		if (iter[1] == NULL)
-			*iter = &cpb;
+		for (attr = acpi_cpufreq_attr; *attr; attr++)
+			if (*attr == &cpb) {
+				*attr = NULL;
+				break;
+			}
 	}
 #endif
 	acpi_cpufreq_boost_init();

From 946c14f812bfff18e6fd6357d06b6e8fa8793fec Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhuix.pan@intel.com>
Date: Mon, 20 Jul 2015 14:22:46 +0800
Subject: [PATCH 20/53] cpufreq: ia64: remove redundant freq_table of
 acpi_cpufreq_data

freq_table is now stored as policy->freq_table, so drop the redundant
freq_table from struct cpufreq_acpi_io.

Signed-off-by: Pan Xinhui <xinhuix.pan@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/ia64-acpi-cpufreq.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index a9c193286ef4..cab8ab675bbc 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -29,7 +29,6 @@ MODULE_LICENSE("GPL");
 
 struct cpufreq_acpi_io {
 	struct acpi_processor_performance	acpi_data;
-	struct cpufreq_frequency_table		*freq_table;
 	unsigned int				resume;
 };
 
@@ -221,6 +220,7 @@ acpi_cpufreq_cpu_init (
 	unsigned int		cpu = policy->cpu;
 	struct cpufreq_acpi_io	*data;
 	unsigned int		result = 0;
+	struct cpufreq_frequency_table *freq_table;
 
 	pr_debug("acpi_cpufreq_cpu_init\n");
 
@@ -254,10 +254,10 @@ acpi_cpufreq_cpu_init (
 	}
 
 	/* alloc freq_table */
-	data->freq_table = kzalloc(sizeof(*data->freq_table) *
+	freq_table = kzalloc(sizeof(*freq_table) *
 	                           (data->acpi_data.state_count + 1),
 	                           GFP_KERNEL);
-	if (!data->freq_table) {
+	if (!freq_table) {
 		result = -ENOMEM;
 		goto err_unreg;
 	}
@@ -276,14 +276,14 @@ acpi_cpufreq_cpu_init (
 	for (i = 0; i <= data->acpi_data.state_count; i++)
 	{
 		if (i < data->acpi_data.state_count) {
-			data->freq_table[i].frequency =
+			freq_table[i].frequency =
 			      data->acpi_data.states[i].core_frequency * 1000;
 		} else {
-			data->freq_table[i].frequency = CPUFREQ_TABLE_END;
+			freq_table[i].frequency = CPUFREQ_TABLE_END;
 		}
 	}
 
-	result = cpufreq_table_validate_and_show(policy, data->freq_table);
+	result = cpufreq_table_validate_and_show(policy, freq_table);
 	if (result) {
 		goto err_freqfree;
 	}
@@ -311,7 +311,7 @@ acpi_cpufreq_cpu_init (
 	return (result);
 
  err_freqfree:
-	kfree(data->freq_table);
+	kfree(freq_table);
  err_unreg:
 	acpi_processor_unregister_performance(cpu);
  err_free:

From 555f3fe957b5bd763d49719cc68c6435c9c8dcf1 Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhuix.pan@intel.com>
Date: Mon, 20 Jul 2015 14:24:36 +0800
Subject: [PATCH 21/53] cpufreq: ia64: Fix a memory leak in
 acpi_cpufreq_cpu_exit()

freq_table should be alloced in ->init and freed in ->exit, but it
it is not freed.  Fix this memory leak in acpi_cpufreq_cpu_exit().

Signed-off-by: Pan Xinhui <xinhuix.pan@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/ia64-acpi-cpufreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index cab8ab675bbc..0202429f1c5b 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -333,6 +333,7 @@ acpi_cpufreq_cpu_exit (
 	if (data) {
 		acpi_io_data[policy->cpu] = NULL;
 		acpi_processor_unregister_performance(policy->cpu);
+		kfree(policy->freq_table);
 		kfree(data);
 	}
 

From 454d3a2500a4eb33be85dde3bfba9e5f6b5efadc Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 22 Jul 2015 17:59:11 +0200
Subject: [PATCH 22/53] cpufreq: Remove cpufreq_rwsem

cpufreq_rwsem was introduced in commit 6eed9404ab3c4 ("cpufreq: Use
rwsem for protecting critical sections) in order to replace
try_module_get() on the cpu-freq driver. That try_module_get() worked
well until the refcount was so heavily used that module removal became
more or less impossible.

Though when looking at the various (undocumented) protection
mechanisms in that code, the randomly sprinkeled around cpufreq_rwsem
locking sites are superfluous.

The policy, which is acquired in cpufreq_cpu_get() and released in
cpufreq_cpu_put() is sufficiently protected already.

  cpufreq_cpu_get(cpu)
    /* Protects against concurrent driver removal */
    read_lock_irqsave(&cpufreq_driver_lock, flags);
    policy = per_cpu(cpufreq_cpu_data, cpu);
    kobject_get(&policy->kobj);
    read_unlock_irqrestore(&cpufreq_driver_lock, flags);

The reference on the policy serializes versus module unload already:

  cpufreq_unregister_driver()
    subsys_interface_unregister()
      __cpufreq_remove_dev_finish()
        per_cpu(cpufreq_cpu_data) = NULL;
	cpufreq_policy_put_kobj()

If there is a reference held on the policy, i.e. obtained prior to the
unregister call, then cpufreq_policy_put_kobj() will wait until that
reference is dropped. So once subsys_interface_unregister() returns
there is no policy pointer in flight and no new reference can be
obtained. So that rwsem protection is useless.

The other usage of cpufreq_rwsem in show()/store() of the sysfs
interface is redundant as well because sysfs already does the proper
kobject_get()/put() pairs.

That leaves CPU hotplug versus module removal. The current
down_write() around the write_lock() in cpufreq_unregister_driver() is
silly at best as it protects actually nothing.

The trivial solution to this is to prevent hotplug across
cpufreq_unregister_driver completely.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 41 +++------------------------------------
 1 file changed, 3 insertions(+), 38 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a3e8fb61cbcc..febda462681c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -112,12 +112,6 @@ static inline bool has_target(void)
 	return cpufreq_driver->target_index || cpufreq_driver->target;
 }
 
-/*
- * rwsem to guarantee that cpufreq driver module doesn't unload during critical
- * sections
- */
-static DECLARE_RWSEM(cpufreq_rwsem);
-
 /* internal prototypes */
 static int __cpufreq_governor(struct cpufreq_policy *policy,
 		unsigned int event);
@@ -277,10 +271,6 @@ EXPORT_SYMBOL_GPL(cpufreq_generic_get);
  * If corresponding call cpufreq_cpu_put() isn't made, the policy wouldn't be
  * freed as that depends on the kobj count.
  *
- * It also takes a read-lock of 'cpufreq_rwsem' and doesn't put it back if a
- * valid policy is found. This is done to make sure the driver doesn't get
- * unregistered while the policy is being used.
- *
  * Return: A valid policy on success, otherwise NULL on failure.
  */
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
@@ -291,9 +281,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 	if (WARN_ON(cpu >= nr_cpu_ids))
 		return NULL;
 
-	if (!down_read_trylock(&cpufreq_rwsem))
-		return NULL;
-
 	/* get the cpufreq driver */
 	read_lock_irqsave(&cpufreq_driver_lock, flags);
 
@@ -306,9 +293,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 
 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	if (!policy)
-		up_read(&cpufreq_rwsem);
-
 	return policy;
 }
 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
@@ -320,13 +304,10 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
  *
  * This decrements the kobject reference count incremented earlier by calling
  * cpufreq_cpu_get().
- *
- * It also drops the read-lock of 'cpufreq_rwsem' taken at cpufreq_cpu_get().
  */
 void cpufreq_cpu_put(struct cpufreq_policy *policy)
 {
 	kobject_put(&policy->kobj);
-	up_read(&cpufreq_rwsem);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
@@ -851,9 +832,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 	struct freq_attr *fattr = to_attr(attr);
 	ssize_t ret;
 
-	if (!down_read_trylock(&cpufreq_rwsem))
-		return -EINVAL;
-
 	down_read(&policy->rwsem);
 
 	if (fattr->show)
@@ -862,7 +840,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 		ret = -EIO;
 
 	up_read(&policy->rwsem);
-	up_read(&cpufreq_rwsem);
 
 	return ret;
 }
@@ -879,9 +856,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	if (!cpu_online(policy->cpu))
 		goto unlock;
 
-	if (!down_read_trylock(&cpufreq_rwsem))
-		goto unlock;
-
 	down_write(&policy->rwsem);
 
 	/* Updating inactive policies is invalid, so avoid doing that. */
@@ -897,8 +871,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 
 unlock_policy_rwsem:
 	up_write(&policy->rwsem);
-
-	up_read(&cpufreq_rwsem);
 unlock:
 	put_online_cpus();
 
@@ -1261,15 +1233,11 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	if (cpu_is_offline(cpu))
 		return add_cpu_dev_symlink(per_cpu(cpufreq_cpu_data, cpu), cpu);
 
-	if (!down_read_trylock(&cpufreq_rwsem))
-		return 0;
-
 	/* Check if this CPU already has a policy to manage it */
 	policy = per_cpu(cpufreq_cpu_data, cpu);
 	if (policy && !policy_is_inactive(policy)) {
 		WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
 		ret = cpufreq_add_policy_cpu(policy, cpu, dev);
-		up_read(&cpufreq_rwsem);
 		return ret;
 	}
 
@@ -1395,8 +1363,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
 
-	up_read(&cpufreq_rwsem);
-
 	/* Callback for handling stuff after policy is ready */
 	if (cpufreq_driver->ready)
 		cpufreq_driver->ready(policy);
@@ -1416,8 +1382,6 @@ out_exit_policy:
 out_free_policy:
 	cpufreq_policy_free(policy, recover_policy);
 out_release_rwsem:
-	up_read(&cpufreq_rwsem);
-
 	return ret;
 }
 
@@ -2604,19 +2568,20 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 
 	pr_debug("unregistering driver %s\n", driver->name);
 
+	/* Protect against concurrent cpu hotplug */
+	get_online_cpus();
 	subsys_interface_unregister(&cpufreq_interface);
 	if (cpufreq_boost_supported())
 		cpufreq_sysfs_remove_file(&boost.attr);
 
 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
 
-	down_write(&cpufreq_rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
 	cpufreq_driver = NULL;
 
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-	up_write(&cpufreq_rwsem);
+	put_online_cpus();
 
 	return 0;
 }

From 15c0b4d222f83672407419f9c9e167e996d8ad2b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Jul 2015 23:11:09 +0200
Subject: [PATCH 23/53] cpufreq: Rework two functions related to CPU offline

Since __cpufreq_remove_dev_prepare() and __cpufreq_remove_dev_finish()
are about CPU offline rather than about CPU removal, rename them to
cpufreq_offline_prepare() and cpufreq_offline_finish(), respectively.

Also change their argument from a struct device pointer to a CPU
number, because they use the CPU number only internally anyway
and make them void as their return values are ignored.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 46251e8d30f2..29e3ec979266 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1398,10 +1398,8 @@ out_release_rwsem:
 	return ret;
 }
 
-static int __cpufreq_remove_dev_prepare(struct device *dev)
+static void cpufreq_offline_prepare(unsigned int cpu)
 {
-	unsigned int cpu = dev->id;
-	int ret = 0;
 	struct cpufreq_policy *policy;
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
@@ -1409,11 +1407,11 @@ static int __cpufreq_remove_dev_prepare(struct device *dev)
 	policy = cpufreq_cpu_get_raw(cpu);
 	if (!policy) {
 		pr_debug("%s: No cpu_data found\n", __func__);
-		return -EINVAL;
+		return;
 	}
 
 	if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+		int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret)
 			pr_err("%s: Failed to stop governor\n", __func__);
 	}
@@ -1434,7 +1432,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev)
 	/* Start governor again for active policy */
 	if (!policy_is_inactive(policy)) {
 		if (has_target()) {
-			ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
+			int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
 			if (!ret)
 				ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 
@@ -1444,28 +1442,24 @@ static int __cpufreq_remove_dev_prepare(struct device *dev)
 	} else if (cpufreq_driver->stop_cpu) {
 		cpufreq_driver->stop_cpu(policy);
 	}
-
-	return ret;
 }
 
-static int __cpufreq_remove_dev_finish(struct device *dev)
+static void cpufreq_offline_finish(unsigned int cpu)
 {
-	unsigned int cpu = dev->id;
-	int ret;
 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
 
 	if (!policy) {
 		pr_debug("%s: No cpu_data found\n", __func__);
-		return -EINVAL;
+		return;
 	}
 
 	/* Only proceed for inactive policies */
 	if (!policy_is_inactive(policy))
-		return 0;
+		return;
 
 	/* If cpu is last user of policy, free policy */
 	if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
 		if (ret)
 			pr_err("%s: Failed to exit governor\n", __func__);
 	}
@@ -1477,8 +1471,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev)
 	 */
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
-
-	return 0;
 }
 
 /**
@@ -1495,8 +1487,8 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 		return 0;
 
 	if (cpu_online(cpu)) {
-		__cpufreq_remove_dev_prepare(dev);
-		__cpufreq_remove_dev_finish(dev);
+		cpufreq_offline_prepare(cpu);
+		cpufreq_offline_finish(cpu);
 	}
 
 	cpumask_clear_cpu(cpu, policy->real_cpus);
@@ -2379,11 +2371,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 			break;
 
 		case CPU_DOWN_PREPARE:
-			__cpufreq_remove_dev_prepare(dev);
+			cpufreq_offline_prepare(cpu);
 			break;
 
 		case CPU_POST_DEAD:
-			__cpufreq_remove_dev_finish(dev);
+			cpufreq_offline_finish(cpu);
 			break;
 
 		case CPU_DOWN_FAILED:

From 11ce707e6c2aea05e1f54680fb89a8a44ded5db4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Jul 2015 23:11:21 +0200
Subject: [PATCH 24/53] cpufreq: Drop cpufreq_policy_restore()

Notice that when cpufreq_policy_restore() is called, its per-CPU
cpufreq_cpu_data variable has been already dereferenced and if that
variable is not NULL, the policy local pointer in cpufreq_add_dev()
contains its value.

Therefore it is not necessary to dereference it again and the
policy pointer can be used directly.  Moreover, if that pointer
is not NULL, the policy is inactive (or the previous check would
have made us return from cpufreq_add_dev()) so the restoration
code from cpufreq_policy_restore() can be moved to that point
in cpufreq_add_dev().

Do that and drop cpufreq_policy_restore().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 44 ++++++++++-----------------------------
 1 file changed, 11 insertions(+), 33 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 29e3ec979266..3199b8dafd60 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1092,28 +1092,6 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 	return 0;
 }
 
-static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-	unsigned long flags;
-
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-	policy = per_cpu(cpufreq_cpu_data, cpu);
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-	if (likely(policy)) {
-		/* Policy should be inactive here */
-		WARN_ON(!policy_is_inactive(policy));
-
-		down_write(&policy->rwsem);
-		policy->cpu = cpu;
-		policy->governor = NULL;
-		up_write(&policy->rwsem);
-	}
-
-	return policy;
-}
-
 static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev)
 {
 	struct cpufreq_policy *policy;
@@ -1226,7 +1204,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	int ret = -ENOMEM;
 	struct cpufreq_policy *policy;
 	unsigned long flags;
-	bool recover_policy = !sif;
+	bool recover_policy;
 
 	pr_debug("adding CPU %u\n", cpu);
 
@@ -1244,18 +1222,18 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	/* Check if this CPU already has a policy to manage it */
 	policy = per_cpu(cpufreq_cpu_data, cpu);
-	if (policy && !policy_is_inactive(policy)) {
+	if (policy) {
 		WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
-		ret = cpufreq_add_policy_cpu(policy, cpu, dev);
-		return ret;
-	}
+		if (!policy_is_inactive(policy))
+			return cpufreq_add_policy_cpu(policy, cpu, dev);
 
-	/*
-	 * Restore the saved policy when doing light-weight init and fall back
-	 * to the full init if that fails.
-	 */
-	policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
-	if (!policy) {
+		/* This is the only online CPU for the policy.  Start over. */
+		recover_policy = true;
+		down_write(&policy->rwsem);
+		policy->cpu = cpu;
+		policy->governor = NULL;
+		up_write(&policy->rwsem);
+	} else {
 		recover_policy = false;
 		policy = cpufreq_policy_alloc(dev);
 		if (!policy)

From d4d854d6c7706e6a5cda297e350e3626d55e9bc9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Jul 2015 23:11:30 +0200
Subject: [PATCH 25/53] cpufreq: Drop unnecessary label from cpufreq_add_dev()

The leftover out_release_rwsem label in cpufreq_add_dev() is not
necessary any more and confusing, so drop it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3199b8dafd60..5c80502339a1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1201,7 +1201,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify)
 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int j, cpu = dev->id;
-	int ret = -ENOMEM;
+	int ret;
 	struct cpufreq_policy *policy;
 	unsigned long flags;
 	bool recover_policy;
@@ -1237,7 +1237,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 		recover_policy = false;
 		policy = cpufreq_policy_alloc(dev);
 		if (!policy)
-			goto out_release_rwsem;
+			return -ENOMEM;
 	}
 
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
@@ -1372,7 +1372,6 @@ out_exit_policy:
 		cpufreq_driver->exit(policy);
 out_free_policy:
 	cpufreq_policy_free(policy, recover_policy);
-out_release_rwsem:
 	return ret;
 }
 

From d9612a495b0bc93f5db0e0033fe4ee7abb7167c7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Jul 2015 23:11:37 +0200
Subject: [PATCH 26/53] cpufreq: Drop unused dev argument from two functions

The dev argument of cpufreq_add_policy_cpu() and
cpufreq_add_dev_interface() is not used by any of them,
so drop it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5c80502339a1..9d8f8cd64e58 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -999,8 +999,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy)
 	}
 }
 
-static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
-				     struct device *dev)
+static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
 {
 	struct freq_attr **drv_attr;
 	int ret = 0;
@@ -1057,8 +1056,7 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
 	return cpufreq_set_policy(policy, &new_policy);
 }
 
-static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
-				  unsigned int cpu, struct device *dev)
+static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
 {
 	int ret = 0;
 
@@ -1225,7 +1223,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	if (policy) {
 		WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
 		if (!policy_is_inactive(policy))
-			return cpufreq_add_policy_cpu(policy, cpu, dev);
+			return cpufreq_add_policy_cpu(policy, cpu);
 
 		/* This is the only online CPU for the policy.  Start over. */
 		recover_policy = true;
@@ -1328,7 +1326,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 				     CPUFREQ_START, policy);
 
 	if (!recover_policy) {
-		ret = cpufreq_add_dev_interface(policy, dev);
+		ret = cpufreq_add_dev_interface(policy);
 		if (ret)
 			goto out_exit_policy;
 		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,

From 4d1f3a5bcb489cc6f7cbc128e0c292fed7868d32 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Jul 2015 23:11:44 +0200
Subject: [PATCH 27/53] cpufreq: Do not update related_cpus on every policy
 activation

The related_cpus mask includes CPUs whose cpufreq_cpu_data per-CPU
pointers have been set the the given policy.  Since those pointers
are only set at the policy creation time and unset when the policy
is deleted, the related_cpus should not be updated between those
two operations.

For this reason, avoid updating it whenever the first of the
"related" CPUs goes online.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 9d8f8cd64e58..0ea4bb723760 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1251,12 +1251,12 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	down_write(&policy->rwsem);
 
-	/* related cpus should atleast have policy->cpus */
-	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
-
-	/* Remember which CPUs have been present at the policy creation time. */
-	if (!recover_policy)
+	if (!recover_policy) {
+		/* related_cpus should at least include policy->cpus. */
+		cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
+		/* Remember CPUs present at the policy creation time. */
 		cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
+	}
 
 	/*
 	 * affected cpus must always be the one, which are online. We aren't

From a34e63b14486e98cf78c99bf8ef141de7508dbc2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Jul 2015 23:11:50 +0200
Subject: [PATCH 28/53] cpufreq: Pass CPU number to cpufreq_policy_alloc()

Change cpufreq_policy_alloc() to take a CPU number instead of a CPU
device pointer as its argument, as it is the only function called by
cpufreq_add_dev() taking a device pointer argument at this point.

That will allow us to split the CPU online part from cpufreq_add_dev()
more cleanly going forward.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0ea4bb723760..0618522d4863 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1090,11 +1090,15 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
 	return 0;
 }
 
-static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev)
+static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
 {
+	struct device *dev = get_cpu_device(cpu);
 	struct cpufreq_policy *policy;
 	int ret;
 
+	if (WARN_ON(!dev))
+		return NULL;
+
 	policy = kzalloc(sizeof(*policy), GFP_KERNEL);
 	if (!policy)
 		return NULL;
@@ -1122,10 +1126,10 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev)
 	init_completion(&policy->kobj_unregister);
 	INIT_WORK(&policy->update, handle_update);
 
-	policy->cpu = dev->id;
+	policy->cpu = cpu;
 
 	/* Set this once on allocation */
-	policy->kobj_cpu = dev->id;
+	policy->kobj_cpu = cpu;
 
 	return policy;
 
@@ -1233,7 +1237,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 		up_write(&policy->rwsem);
 	} else {
 		recover_policy = false;
-		policy = cpufreq_policy_alloc(dev);
+		policy = cpufreq_policy_alloc(cpu);
 		if (!policy)
 			return -ENOMEM;
 	}

From 053819e0bf8407746cc5febf7a4947bee50377b4 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Thu, 16 Jul 2015 13:34:18 +0530
Subject: [PATCH 29/53] cpufreq: powernv: Handle throttling due to Pmax capping
 at chip level

The On-Chip-Controller(OCC) can throttle cpu frequency by reducing the
max allowed frequency for that chip if the chip exceeds its power or
temperature limits. As Pmax capping is a chip level condition report
this throttling behavior at chip level and also do not set the global
'throttled' on Pmax capping instead set the per-chip throttled
variable. Report unthrottling if Pmax is restored after throttling.

This patch adds a structure to store chip id and throttled state of
the chip.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 59 ++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index ebef0d8279c7..d0c18c9ce1ff 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -27,6 +27,7 @@
 #include <linux/smp.h>
 #include <linux/of.h>
 #include <linux/reboot.h>
+#include <linux/slab.h>
 
 #include <asm/cputhreads.h>
 #include <asm/firmware.h>
@@ -42,6 +43,13 @@
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
 static bool rebooting, throttled;
 
+static struct chip {
+	unsigned int id;
+	bool throttled;
+} *chips;
+
+static int nr_chips;
+
 /*
  * Note: The set of pstates consists of contiguous integers, the
  * smallest of which is indicated by powernv_pstate_info.min, the
@@ -301,22 +309,33 @@ static inline unsigned int get_nominal_index(void)
 static void powernv_cpufreq_throttle_check(unsigned int cpu)
 {
 	unsigned long pmsr;
-	int pmsr_pmax, pmsr_lp;
+	int pmsr_pmax, pmsr_lp, i;
 
 	pmsr = get_pmspr(SPRN_PMSR);
 
+	for (i = 0; i < nr_chips; i++)
+		if (chips[i].id == cpu_to_chip_id(cpu))
+			break;
+
 	/* Check for Pmax Capping */
 	pmsr_pmax = (s8)PMSR_MAX(pmsr);
 	if (pmsr_pmax != powernv_pstate_info.max) {
-		throttled = true;
-		pr_info("CPU %d Pmax is reduced to %d\n", cpu, pmsr_pmax);
-		pr_info("Max allowed Pstate is capped\n");
+		if (chips[i].throttled)
+			goto next;
+		chips[i].throttled = true;
+		pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu,
+			chips[i].id, pmsr_pmax);
+	} else if (chips[i].throttled) {
+		chips[i].throttled = false;
+		pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu,
+			chips[i].id, pmsr_pmax);
 	}
 
 	/*
 	 * Check for Psafe by reading LocalPstate
 	 * or check if Psafe_mode_active is set in PMSR.
 	 */
+next:
 	pmsr_lp = (s8)PMSR_LP(pmsr);
 	if ((pmsr_lp < powernv_pstate_info.min) ||
 				(pmsr & PMSR_PSAFE_ENABLE)) {
@@ -414,6 +433,33 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
 	.attr		= powernv_cpu_freq_attr,
 };
 
+static int init_chip_info(void)
+{
+	unsigned int chip[256];
+	unsigned int cpu, i;
+	unsigned int prev_chip_id = UINT_MAX;
+
+	for_each_possible_cpu(cpu) {
+		unsigned int id = cpu_to_chip_id(cpu);
+
+		if (prev_chip_id != id) {
+			prev_chip_id = id;
+			chip[nr_chips++] = id;
+		}
+	}
+
+	chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL);
+	if (!chips)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_chips; i++) {
+		chips[i].id = chip[i];
+		chips[i].throttled = false;
+	}
+
+	return 0;
+}
+
 static int __init powernv_cpufreq_init(void)
 {
 	int rc = 0;
@@ -429,6 +475,11 @@ static int __init powernv_cpufreq_init(void)
 		return rc;
 	}
 
+	/* Populate chip info */
+	rc = init_chip_info();
+	if (rc)
+		return rc;
+
 	register_reboot_notifier(&powernv_cpufreq_reboot_nb);
 	return cpufreq_register_driver(&powernv_cpufreq_driver);
 }

From 196ba2d514a13f6af1b3d78de71ce74ed2fc8bdc Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Thu, 16 Jul 2015 13:34:19 +0530
Subject: [PATCH 30/53] powerpc/powernv: Add definition of OPAL_MSG_OCC message
 type

Add OPAL_MSG_OCC message definition to opal_message_type to receive
OCC events like reset, load and throttled. Host performance can be
affected when OCC is reset or OCC throttles the max Pstate.
We can register to opal_message_notifier to receive OPAL_MSG_OCC type
of message and report it to the userspace so as to keep the user
informed about the reason for a performance drop in workloads.

The reset and load OCC events are notified to kernel when FSP sends
OCC_RESET and OCC_LOAD commands.  Both reset and load messages are
sent to kernel on successful completion of reset and load operation
respectively.

The throttle OCC event indicates that the Pmax of the chip is reduced.
The chip_id and throttle reason for reducing Pmax is also queued along
with the message.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/powerpc/include/asm/opal-api.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index e9e4c52f3685..64dc9f547fb6 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -361,6 +361,7 @@ enum opal_msg_type {
 	OPAL_MSG_HMI_EVT,
 	OPAL_MSG_DPO,
 	OPAL_MSG_PRD,
+	OPAL_MSG_OCC,
 	OPAL_MSG_TYPE_MAX,
 };
 
@@ -700,6 +701,17 @@ struct opal_prd_msg_header {
 
 struct opal_prd_msg;
 
+#define OCC_RESET                       0
+#define OCC_LOAD                        1
+#define OCC_THROTTLE                    2
+#define OCC_MAX_THROTTLE_STATUS         5
+
+struct opal_occ_msg {
+	__be64 type;
+	__be64 chip;
+	__be64 throttle_status;
+};
+
 /*
  * SG entries
  *

From cb166fa937a2fbc14badcafca86202354c34a213 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Thu, 16 Jul 2015 13:34:20 +0530
Subject: [PATCH 31/53] cpufreq: powernv: Register for OCC related opal_message
 notification

OCC is an On-Chip-Controller which takes care of power and thermal
safety of the chip. During runtime due to power failure or
overtemperature the OCC may throttle the frequencies of the CPUs to
remain within the power budget.

We want the cpufreq driver to be aware of such situations to be able
to report the reason to the user. We register to opal_message_notifier
to receive OCC messages from opal.

powernv_cpufreq_throttle_check() reports any frequency throttling and
this patch will report the reason or event that caused throttling. We
can be throttled if OCC is reset or OCC limits Pmax due to power or
thermal reasons. We are also notified of unthrottling after an OCC
reset or if OCC restores Pmax on the chip.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 74 ++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index d0c18c9ce1ff..a634199052b6 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -33,6 +33,7 @@
 #include <asm/firmware.h>
 #include <asm/reg.h>
 #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
+#include <asm/opal.h>
 
 #define POWERNV_MAX_PSTATES	256
 #define PMSR_PSAFE_ENABLE	(1UL << 30)
@@ -41,7 +42,7 @@
 #define PMSR_LP(x)		((x >> 48) & 0xFF)
 
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
-static bool rebooting, throttled;
+static bool rebooting, throttled, occ_reset;
 
 static struct chip {
 	unsigned int id;
@@ -414,6 +415,74 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
 	.notifier_call = powernv_cpufreq_reboot_notifier,
 };
 
+static char throttle_reason[][30] = {
+					"No throttling",
+					"Power Cap",
+					"Processor Over Temperature",
+					"Power Supply Failure",
+					"Over Current",
+					"OCC Reset"
+				     };
+
+static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
+				   unsigned long msg_type, void *_msg)
+{
+	struct opal_msg *msg = _msg;
+	struct opal_occ_msg omsg;
+
+	if (msg_type != OPAL_MSG_OCC)
+		return 0;
+
+	omsg.type = be64_to_cpu(msg->params[0]);
+
+	switch (omsg.type) {
+	case OCC_RESET:
+		occ_reset = true;
+		/*
+		 * powernv_cpufreq_throttle_check() is called in
+		 * target() callback which can detect the throttle state
+		 * for governors like ondemand.
+		 * But static governors will not call target() often thus
+		 * report throttling here.
+		 */
+		if (!throttled) {
+			throttled = true;
+			pr_crit("CPU Frequency is throttled\n");
+		}
+		pr_info("OCC: Reset\n");
+		break;
+	case OCC_LOAD:
+		pr_info("OCC: Loaded\n");
+		break;
+	case OCC_THROTTLE:
+		omsg.chip = be64_to_cpu(msg->params[1]);
+		omsg.throttle_status = be64_to_cpu(msg->params[2]);
+
+		if (occ_reset) {
+			occ_reset = false;
+			throttled = false;
+			pr_info("OCC: Active\n");
+			return 0;
+		}
+
+		if (omsg.throttle_status &&
+		    omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS)
+			pr_info("OCC: Chip %u Pmax reduced due to %s\n",
+				(unsigned int)omsg.chip,
+				throttle_reason[omsg.throttle_status]);
+		else if (!omsg.throttle_status)
+			pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip,
+				throttle_reason[omsg.throttle_status]);
+	}
+	return 0;
+}
+
+static struct notifier_block powernv_cpufreq_opal_nb = {
+	.notifier_call	= powernv_cpufreq_occ_msg,
+	.next		= NULL,
+	.priority	= 0,
+};
+
 static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 {
 	struct powernv_smp_call_data freq_data;
@@ -481,6 +550,7 @@ static int __init powernv_cpufreq_init(void)
 		return rc;
 
 	register_reboot_notifier(&powernv_cpufreq_reboot_nb);
+	opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
 	return cpufreq_register_driver(&powernv_cpufreq_driver);
 }
 module_init(powernv_cpufreq_init);
@@ -488,6 +558,8 @@ module_init(powernv_cpufreq_init);
 static void __exit powernv_cpufreq_exit(void)
 {
 	unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
+	opal_message_notifier_unregister(OPAL_MSG_OCC,
+					 &powernv_cpufreq_opal_nb);
 	cpufreq_unregister_driver(&powernv_cpufreq_driver);
 }
 module_exit(powernv_cpufreq_exit);

From 735366fc407755626058218fc8d0430735a669ac Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Thu, 16 Jul 2015 13:34:21 +0530
Subject: [PATCH 32/53] cpufreq: powernv: Call throttle_check() on receiving
 OCC_THROTTLE

Re-evaluate the chip's throttled state on recieving OCC_THROTTLE
notification by executing *throttle_check() on any one of the cpu on
the chip. This is a sanity check to verify if we were indeed
throttled/unthrottled after receiving OCC_THROTTLE notification.

We cannot call *throttle_check() directly from the notification
handler because we could be handling chip1's notification in chip2. So
initiate an smp_call to execute *throttle_check(). We are irq-disabled
in the notification handler, so use a worker thread to smp_call
throttle_check() on any of the cpu in the chipmask.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index a634199052b6..22f33ff2d77a 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -47,6 +47,8 @@ static bool rebooting, throttled, occ_reset;
 static struct chip {
 	unsigned int id;
 	bool throttled;
+	cpumask_t mask;
+	struct work_struct throttle;
 } *chips;
 
 static int nr_chips;
@@ -307,8 +309,9 @@ static inline unsigned int get_nominal_index(void)
 	return powernv_pstate_info.max - powernv_pstate_info.nominal;
 }
 
-static void powernv_cpufreq_throttle_check(unsigned int cpu)
+static void powernv_cpufreq_throttle_check(void *data)
 {
+	unsigned int cpu = smp_processor_id();
 	unsigned long pmsr;
 	int pmsr_pmax, pmsr_lp, i;
 
@@ -370,7 +373,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 		return 0;
 
 	if (!throttled)
-		powernv_cpufreq_throttle_check(smp_processor_id());
+		powernv_cpufreq_throttle_check(NULL);
 
 	freq_data.pstate_id = powernv_freqs[new_index].driver_data;
 
@@ -415,6 +418,14 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
 	.notifier_call = powernv_cpufreq_reboot_notifier,
 };
 
+void powernv_cpufreq_work_fn(struct work_struct *work)
+{
+	struct chip *chip = container_of(work, struct chip, throttle);
+
+	smp_call_function_any(&chip->mask,
+			      powernv_cpufreq_throttle_check, NULL, 0);
+}
+
 static char throttle_reason[][30] = {
 					"No throttling",
 					"Power Cap",
@@ -429,6 +440,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 {
 	struct opal_msg *msg = _msg;
 	struct opal_occ_msg omsg;
+	int i;
 
 	if (msg_type != OPAL_MSG_OCC)
 		return 0;
@@ -462,6 +474,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 			occ_reset = false;
 			throttled = false;
 			pr_info("OCC: Active\n");
+
+			for (i = 0; i < nr_chips; i++)
+				schedule_work(&chips[i].throttle);
+
 			return 0;
 		}
 
@@ -473,6 +489,12 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 		else if (!omsg.throttle_status)
 			pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip,
 				throttle_reason[omsg.throttle_status]);
+		else
+			return 0;
+
+		for (i = 0; i < nr_chips; i++)
+			if (chips[i].id == omsg.chip)
+				schedule_work(&chips[i].throttle);
 	}
 	return 0;
 }
@@ -524,6 +546,8 @@ static int init_chip_info(void)
 	for (i = 0; i < nr_chips; i++) {
 		chips[i].id = chip[i];
 		chips[i].throttled = false;
+		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
+		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
 	}
 
 	return 0;

From 3dd3ebe5bb3837aeac28a23f8f22b97cb84abab6 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Thu, 16 Jul 2015 13:34:22 +0530
Subject: [PATCH 33/53] cpufreq: powernv: Report Psafe only if
 PMSR.psafe_mode_active bit is set

On a reset cycle of OCC, although the system retires from safe
frequency state the local pstate is not restored to Pmin or last
requested pstate. Now if the cpufreq governor initiates a pstate
change, the local pstate will be in Psafe and we will be reporting a
false positive when we are not throttled.

So in powernv_cpufreq_throttle_check() remove the condition which
checks if local pstate is less than Pmin while checking for Psafe
frequency. If the cpus are forced to Psafe then PMSR.psafe_mode_active
bit will be set. So, when OCCs become active this bit will be cleared.
Let us just rely on this bit for reporting throttling.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 22f33ff2d77a..90b42938837e 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -39,7 +39,6 @@
 #define PMSR_PSAFE_ENABLE	(1UL << 30)
 #define PMSR_SPR_EM_DISABLE	(1UL << 31)
 #define PMSR_MAX(x)		((x >> 32) & 0xFF)
-#define PMSR_LP(x)		((x >> 48) & 0xFF)
 
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
 static bool rebooting, throttled, occ_reset;
@@ -313,7 +312,7 @@ static void powernv_cpufreq_throttle_check(void *data)
 {
 	unsigned int cpu = smp_processor_id();
 	unsigned long pmsr;
-	int pmsr_pmax, pmsr_lp, i;
+	int pmsr_pmax, i;
 
 	pmsr = get_pmspr(SPRN_PMSR);
 
@@ -335,14 +334,9 @@ static void powernv_cpufreq_throttle_check(void *data)
 			chips[i].id, pmsr_pmax);
 	}
 
-	/*
-	 * Check for Psafe by reading LocalPstate
-	 * or check if Psafe_mode_active is set in PMSR.
-	 */
+	/* Check if Psafe_mode_active is set in PMSR. */
 next:
-	pmsr_lp = (s8)PMSR_LP(pmsr);
-	if ((pmsr_lp < powernv_pstate_info.min) ||
-				(pmsr & PMSR_PSAFE_ENABLE)) {
+	if (pmsr & PMSR_PSAFE_ENABLE) {
 		throttled = true;
 		pr_info("Pstate set to safe frequency\n");
 	}

From 227942809b52f23cda414858b635c0285f11de00 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Thu, 16 Jul 2015 13:34:23 +0530
Subject: [PATCH 34/53] cpufreq: powernv: Restore cpu frequency to policy->cur
 on unthrottling

If frequency is throttled due to OCC reset then cpus will be in Psafe
frequency, so restore the frequency on all cpus to policy->cur when
OCCs are active again. And if frequency is throttled due to Pmax
capping then restore the frequency of all the cpus  in the chip on
unthrottling.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 90b42938837e..546e056e416d 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -48,6 +48,7 @@ static struct chip {
 	bool throttled;
 	cpumask_t mask;
 	struct work_struct throttle;
+	bool restore;
 } *chips;
 
 static int nr_chips;
@@ -415,9 +416,29 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
 void powernv_cpufreq_work_fn(struct work_struct *work)
 {
 	struct chip *chip = container_of(work, struct chip, throttle);
+	unsigned int cpu;
+	cpumask_var_t mask;
 
 	smp_call_function_any(&chip->mask,
 			      powernv_cpufreq_throttle_check, NULL, 0);
+
+	if (!chip->restore)
+		return;
+
+	chip->restore = false;
+	cpumask_copy(mask, &chip->mask);
+	for_each_cpu_and(cpu, mask, cpu_online_mask) {
+		int index, tcpu;
+		struct cpufreq_policy policy;
+
+		cpufreq_get_policy(&policy, cpu);
+		cpufreq_frequency_table_target(&policy, policy.freq_table,
+					       policy.cur,
+					       CPUFREQ_RELATION_C, &index);
+		powernv_cpufreq_target_index(&policy, index);
+		for_each_cpu(tcpu, policy.cpus)
+			cpumask_clear_cpu(tcpu, mask);
+	}
 }
 
 static char throttle_reason[][30] = {
@@ -469,8 +490,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 			throttled = false;
 			pr_info("OCC: Active\n");
 
-			for (i = 0; i < nr_chips; i++)
+			for (i = 0; i < nr_chips; i++) {
+				chips[i].restore = true;
 				schedule_work(&chips[i].throttle);
+			}
 
 			return 0;
 		}
@@ -487,8 +510,11 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 			return 0;
 
 		for (i = 0; i < nr_chips; i++)
-			if (chips[i].id == omsg.chip)
+			if (chips[i].id == omsg.chip) {
+				if (!omsg.throttle_status)
+					chips[i].restore = true;
 				schedule_work(&chips[i].throttle);
+			}
 	}
 	return 0;
 }
@@ -542,6 +568,7 @@ static int init_chip_info(void)
 		chips[i].throttled = false;
 		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
 		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
+		chips[i].restore = false;
 	}
 
 	return 0;

From 0b275352872b2641ed5c94d0f0f8c7e907bf3e3f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 29 Jul 2015 03:03:44 +0200
Subject: [PATCH 35/53] cpufreq: Separate CPU device registration from CPU
 online

To separate the CPU online interface from the CPU device
registration, split cpufreq_online() out of cpufreq_add_dev()
and make cpufreq_cpu_callback() call the former, while
cpufreq_add_dev() itself will only be used as the CPU device
addition subsystem interface callback.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Suggested-by: Russell King <linux@arm.linux.org.uk>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 90 ++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0618522d4863..0d46b557c016 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1191,36 +1191,15 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify)
 	kfree(policy);
 }
 
-/**
- * cpufreq_add_dev - add a CPU device
- *
- * Adds the cpufreq interface for a CPU device.
- *
- * The Oracle says: try running cpufreq registration/unregistration concurrently
- * with with cpu hotplugging and all hell will break loose. Tried to clean this
- * mess up, but more thorough testing is needed. - Mathieu
- */
-static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
+static int cpufreq_online(unsigned int cpu)
 {
-	unsigned int j, cpu = dev->id;
-	int ret;
 	struct cpufreq_policy *policy;
-	unsigned long flags;
 	bool recover_policy;
+	unsigned long flags;
+	unsigned int j;
+	int ret;
 
-	pr_debug("adding CPU %u\n", cpu);
-
-	if (cpu_is_offline(cpu)) {
-		/*
-		 * Only possible if we are here from the subsys_interface add
-		 * callback.  A hotplug notifier will follow and we will handle
-		 * it as CPU online then.  For now, just create the sysfs link,
-		 * unless there is no policy or the link is already present.
-		 */
-		policy = per_cpu(cpufreq_cpu_data, cpu);
-		return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus)
-			? add_cpu_dev_symlink(policy, cpu) : 0;
-	}
+	pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
 
 	/* Check if this CPU already has a policy to manage it */
 	policy = per_cpu(cpufreq_cpu_data, cpu);
@@ -1377,6 +1356,35 @@ out_free_policy:
 	return ret;
 }
 
+/**
+ * cpufreq_add_dev - the cpufreq interface for a CPU device.
+ * @dev: CPU device.
+ * @sif: Subsystem interface structure pointer (not used)
+ */
+static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
+{
+	unsigned cpu = dev->id;
+	int ret;
+
+	dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
+
+	if (cpu_online(cpu)) {
+		ret = cpufreq_online(cpu);
+	} else {
+		/*
+		 * A hotplug notifier will follow and we will handle it as CPU
+		 * online then.  For now, just create the sysfs link, unless
+		 * there is no policy or the link is already present.
+		 */
+		struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
+
+		ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus)
+			? add_cpu_dev_symlink(policy, cpu) : 0;
+	}
+
+	return ret;
+}
+
 static void cpufreq_offline_prepare(unsigned int cpu)
 {
 	struct cpufreq_policy *policy;
@@ -2340,27 +2348,23 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct device *dev;
 
-	dev = get_cpu_device(cpu);
-	if (dev) {
-		switch (action & ~CPU_TASKS_FROZEN) {
-		case CPU_ONLINE:
-			cpufreq_add_dev(dev, NULL);
-			break;
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+		cpufreq_online(cpu);
+		break;
 
-		case CPU_DOWN_PREPARE:
-			cpufreq_offline_prepare(cpu);
-			break;
+	case CPU_DOWN_PREPARE:
+		cpufreq_offline_prepare(cpu);
+		break;
 
-		case CPU_POST_DEAD:
-			cpufreq_offline_finish(cpu);
-			break;
+	case CPU_POST_DEAD:
+		cpufreq_offline_finish(cpu);
+		break;
 
-		case CPU_DOWN_FAILED:
-			cpufreq_add_dev(dev, NULL);
-			break;
-		}
+	case CPU_DOWN_FAILED:
+		cpufreq_online(cpu);
+		break;
 	}
 	return NOTIFY_OK;
 }

From 194d99c7e3ce54a8c7d8adf79fb1d8ad49a9bf9f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 29 Jul 2015 03:08:57 +0200
Subject: [PATCH 36/53] cpufreq: Replace recover_policy with new_policy in
 cpufreq_online()

The recover_policy is unsed in cpufreq_online() to indicate whether
a new policy object is created or an existing one is reinitialized.

The "recover" part of the name is slightly confusing (it should be
"reinitialization" rather than "recovery") and the logical not (!)
operator is applied to it in almost all of the checks it is used in,
so replace that variable with a new one called "new_policy" that
will be true in the case of a new policy creation.

While at it, drop one of the labels that is jumped to from only
one spot.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0d46b557c016..24e4ba568e77 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1194,7 +1194,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify)
 static int cpufreq_online(unsigned int cpu)
 {
 	struct cpufreq_policy *policy;
-	bool recover_policy;
+	bool new_policy;
 	unsigned long flags;
 	unsigned int j;
 	int ret;
@@ -1209,13 +1209,13 @@ static int cpufreq_online(unsigned int cpu)
 			return cpufreq_add_policy_cpu(policy, cpu);
 
 		/* This is the only online CPU for the policy.  Start over. */
-		recover_policy = true;
+		new_policy = false;
 		down_write(&policy->rwsem);
 		policy->cpu = cpu;
 		policy->governor = NULL;
 		up_write(&policy->rwsem);
 	} else {
-		recover_policy = false;
+		new_policy = true;
 		policy = cpufreq_policy_alloc(cpu);
 		if (!policy)
 			return -ENOMEM;
@@ -1234,7 +1234,7 @@ static int cpufreq_online(unsigned int cpu)
 
 	down_write(&policy->rwsem);
 
-	if (!recover_policy) {
+	if (new_policy) {
 		/* related_cpus should at least include policy->cpus. */
 		cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
 		/* Remember CPUs present at the policy creation time. */
@@ -1247,7 +1247,7 @@ static int cpufreq_online(unsigned int cpu)
 	 */
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
-	if (!recover_policy) {
+	if (new_policy) {
 		policy->user_policy.min = policy->min;
 		policy->user_policy.max = policy->max;
 
@@ -1308,7 +1308,7 @@ static int cpufreq_online(unsigned int cpu)
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
 
-	if (!recover_policy) {
+	if (new_policy) {
 		ret = cpufreq_add_dev_interface(policy);
 		if (ret)
 			goto out_exit_policy;
@@ -1324,10 +1324,12 @@ static int cpufreq_online(unsigned int cpu)
 	if (ret) {
 		pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
 		       __func__, cpu, ret);
-		goto out_remove_policy_notify;
+		/* cpufreq_policy_free() will notify based on this */
+		new_policy = false;
+		goto out_exit_policy;
 	}
 
-	if (!recover_policy) {
+	if (new_policy) {
 		policy->user_policy.policy = policy->policy;
 		policy->user_policy.governor = policy->governor;
 	}
@@ -1343,16 +1345,13 @@ static int cpufreq_online(unsigned int cpu)
 
 	return 0;
 
-out_remove_policy_notify:
-	/* cpufreq_policy_free() will notify based on this */
-	recover_policy = true;
 out_exit_policy:
 	up_write(&policy->rwsem);
 
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
 out_free_policy:
-	cpufreq_policy_free(policy, recover_policy);
+	cpufreq_policy_free(policy, !new_policy);
 	return ret;
 }
 

From fdd320da84c63092fe6e155cb7b8d80f7f765fa9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 30 Jul 2015 01:45:07 +0200
Subject: [PATCH 37/53] cpufreq: Lock CPU online/offline in
 cpufreq_register_driver()

To protect against races with concurrent CPU online/offline, call
get_online_cpus() before registering a cpufreq driver.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 24e4ba568e77..0f4e96ff99e8 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2471,10 +2471,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 
 	pr_debug("trying to register driver %s\n", driver_data->name);
 
+	/* Protect against concurrent CPU online/offline. */
+	get_online_cpus();
+
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	if (cpufreq_driver) {
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-		return -EEXIST;
+		ret = -EEXIST;
+		goto out;
 	}
 	cpufreq_driver = driver_data;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
@@ -2513,7 +2517,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
 	pr_debug("driver %s up and running\n", driver_data->name);
 
-	return 0;
+out:
+	put_online_cpus();
+	return ret;
+
 err_if_unreg:
 	subsys_interface_unregister(&cpufreq_interface);
 err_boost_unreg:
@@ -2523,7 +2530,7 @@ err_null_driver:
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	cpufreq_driver = NULL;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-	return ret;
+	goto out;
 }
 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
 

From fba9573b33f8bddd772195c202f6b8ec0751cedd Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhuix.pan@intel.com>
Date: Thu, 30 Jul 2015 18:10:40 +0800
Subject: [PATCH 38/53] cpufreq: Correct a freq check in cpufreq_set_policy()

This check was originally added by commit 9c9a43ed2734 ("[CPUFREQ]
return error when failing to set minfreq").It attempt to return an error
on obviously incorrect limits when we echo xxx >.../scaling_max,min_freq
Actually we just need check if new_policy->min > new_policy->max.
Because at least one of max/min is copied from cpufreq_get_policy().

For example, when we echo xxx > .../scaling_min_freq, new_policy is
copied from policy in cpufreq_get_policy. new_policy->max is same with
policy->max. new_policy->min is set to a new value.

Let me explain it in deduction method, first statement in if ():
new_policy->min > policy->max
policy->max == new_policy->max
==> new_policy->min > new_policy->max

second statement in if():
new_policy->max < policy->min
policy->max < policy->min
==>new_policy->min > new_policy->max (induction method)

So we have proved that we only need check if new_policy->min >
new_policy->max.

After apply this patch, we can also modify ->min and ->max at same time
if new freq range is very much different from current freq range. For
example, if current freq range is 480000-960000, then we want to set
this range to 1120000-2240000, we would fail in the past because
new_policy->min > policy->max. As long as the cpufreq range is valid, we
has no reason to reject the user. So correct the check to avoid such
case.

Signed-off-by: Pan Xinhui <xinhuix.pan@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0f4e96ff99e8..76a26609d96b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2190,7 +2190,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 
 	memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
 
-	if (new_policy->min > policy->max || new_policy->max < policy->min)
+	/*
+	* This check works well when we store new min/max freq attributes,
+	* because new_policy is a copy of policy with one field updated.
+	*/
+	if (new_policy->min > new_policy->max)
 		return -EINVAL;
 
 	/* verify the cpu speed can be set within this limit */

From 144c8e172b5c388ddf41fa64e154f53384ec3448 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Wed, 29 Jul 2015 23:53:10 +0800
Subject: [PATCH 39/53] intel_pstate: Fix possible overflow complained by
 Coverity

Coverity scanning performed on intel_pstate.c shows possible
overflow when doing left shifting:
val = pstate << 8;
since pstate is of type integer, while val is of u64, left shifting
pstate might lead to potential loss of upper bits. Say, if pstate equals
0x4000 0000, after pstate << 8 we will get zero assigned to val.
Although pstate will not likely be that big, this patch cast the left
operand to u64 before performing the left shift, to avoid complaining
from Coverity.

Reported-by: Coquard, Christophe <christophe.coquard@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7b2721fb861f..b9354b6f7149 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -521,7 +521,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate)
 	int32_t vid_fp;
 	u32 vid;
 
-	val = pstate << 8;
+	val = (u64)pstate << 8;
 	if (limits.no_turbo && !limits.turbo_disabled)
 		val |= (u64)1 << 32;
 
@@ -610,7 +610,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
 {
 	u64 val;
 
-	val = pstate << 8;
+	val = (u64)pstate << 8;
 	if (limits.no_turbo && !limits.turbo_disabled)
 		val |= (u64)1 << 32;
 

From 1c9391238753ffb370f02743b5f43bac6dea304b Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi <kristen@linux.intel.com>
Date: Wed, 5 Aug 2015 12:47:14 -0700
Subject: [PATCH 40/53] intel_pstate: Add SKY-S support

Whitelist the SKL-S processor

Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b9354b6f7149..f05f0633b2a9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -908,6 +908,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	ICPU(0x4c, byt_params),
 	ICPU(0x4e, core_params),
 	ICPU(0x4f, core_params),
+	ICPU(0x5e, core_params),
 	ICPU(0x56, core_params),
 	ICPU(0x57, knl_params),
 	{}

From 5aecc3c8a24a7c2db29ec4a927bbb08befcb508e Mon Sep 17 00:00:00 2001
From: Ethan Zhao <ethan.zhao@oracle.com>
Date: Wed, 5 Aug 2015 09:28:50 +0900
Subject: [PATCH 41/53] intel_pstate: append more Oracle OEM table id to vendor
 bypass list

Append more Oracle X86 servers that have their own power management,

SUN FIRE X4275 M3
SUN FIRE X4170 M3
and
SUN FIRE X6-2

Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by:  Kristen Carlson Accardi <kristen@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f05f0633b2a9..31d0548638e8 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1174,6 +1174,10 @@ static struct hw_vendor_info vendor_info[] = {
 	{1, "ORACLE", "X4270M3 ", PPC},
 	{1, "ORACLE", "X4270M2 ", PPC},
 	{1, "ORACLE", "X4170M2 ", PPC},
+	{1, "ORACLE", "X4170 M3", PPC},
+	{1, "ORACLE", "X4275 M3", PPC},
+	{1, "ORACLE", "X6-2    ", PPC},
+	{1, "ORACLE", "Sudbury ", PPC},
 	{0, "", ""},
 };
 

From c9c96ae2c57d91ea2b73ef447fdd44c760a96d97 Mon Sep 17 00:00:00 2001
From: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
Date: Mon, 17 Aug 2015 17:24:23 +0800
Subject: [PATCH 42/53] dt-bindings: mediatek: Add MT8173 CPU DVFS clock
 bindings

This patch adds the clock and regulator consumer properties part of
document for CPU DVFS clocks on Mediatek MT8173 SoC.

Signed-off-by: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
Acked-by: Michael Turquette <mturquette@baylibre.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../bindings/clock/mt8173-cpu-dvfs.txt        | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt

diff --git a/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt b/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt
new file mode 100644
index 000000000000..52b457c23eed
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt
@@ -0,0 +1,83 @@
+Device Tree Clock bindins for CPU DVFS of Mediatek MT8173 SoC
+
+Required properties:
+- clocks: A list of phandle + clock-specifier pairs for the clocks listed in clock names.
+- clock-names: Should contain the following:
+	"cpu"		- The multiplexer for clock input of CPU cluster.
+	"intermediate"	- A parent of "cpu" clock which is used as "intermediate" clock
+			  source (usually MAINPLL) when the original CPU PLL is under
+			  transition and not stable yet.
+	Please refer to Documentation/devicetree/bindings/clk/clock-bindings.txt for
+	generic clock consumer properties.
+- proc-supply: Regulator for Vproc of CPU cluster.
+
+Optional properties:
+- sram-supply: Regulator for Vsram of CPU cluster. When present, the cpufreq driver
+	       needs to do "voltage tracking" to step by step scale up/down Vproc and
+	       Vsram to fit SoC specific needs. When absent, the voltage scaling
+	       flow is handled by hardware, hence no software "voltage tracking" is
+	       needed.
+
+Example:
+--------
+	cpu0: cpu@0 {
+		device_type = "cpu";
+		compatible = "arm,cortex-a53";
+		reg = <0x000>;
+		enable-method = "psci";
+		cpu-idle-states = <&CPU_SLEEP_0>;
+		clocks = <&infracfg CLK_INFRA_CA53SEL>,
+			 <&apmixedsys CLK_APMIXED_MAINPLL>;
+		clock-names = "cpu", "intermediate";
+	};
+
+	cpu1: cpu@1 {
+		device_type = "cpu";
+		compatible = "arm,cortex-a53";
+		reg = <0x001>;
+		enable-method = "psci";
+		cpu-idle-states = <&CPU_SLEEP_0>;
+		clocks = <&infracfg CLK_INFRA_CA53SEL>,
+			 <&apmixedsys CLK_APMIXED_MAINPLL>;
+		clock-names = "cpu", "intermediate";
+	};
+
+	cpu2: cpu@100 {
+		device_type = "cpu";
+		compatible = "arm,cortex-a57";
+		reg = <0x100>;
+		enable-method = "psci";
+		cpu-idle-states = <&CPU_SLEEP_0>;
+		clocks = <&infracfg CLK_INFRA_CA57SEL>,
+			 <&apmixedsys CLK_APMIXED_MAINPLL>;
+		clock-names = "cpu", "intermediate";
+	};
+
+	cpu3: cpu@101 {
+		device_type = "cpu";
+		compatible = "arm,cortex-a57";
+		reg = <0x101>;
+		enable-method = "psci";
+		cpu-idle-states = <&CPU_SLEEP_0>;
+		clocks = <&infracfg CLK_INFRA_CA57SEL>,
+			 <&apmixedsys CLK_APMIXED_MAINPLL>;
+		clock-names = "cpu", "intermediate";
+	};
+
+	&cpu0 {
+		proc-supply = <&mt6397_vpca15_reg>;
+	};
+
+	&cpu1 {
+		proc-supply = <&mt6397_vpca15_reg>;
+	};
+
+	&cpu2 {
+		proc-supply = <&da9211_vcpu_reg>;
+		sram-supply = <&mt6397_vsramca7_reg>;
+	};
+
+	&cpu3 {
+		proc-supply = <&da9211_vcpu_reg>;
+		sram-supply = <&mt6397_vsramca7_reg>;
+	};

From 1453863fb02a18900c9079fa2e4f02710bf46507 Mon Sep 17 00:00:00 2001
From: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
Date: Wed, 19 Aug 2015 10:05:06 +0800
Subject: [PATCH 43/53] cpufreq: mediatek: Add MT8173 cpufreq driver

Mediatek MT8173 is an ARMv8 based quad-core (2*Cortex-A53 and
2*Cortex-A72) SoC with duall clusters. For each cluster, two voltage
inputs, Vproc and Vsram are supplied by two regulators. For the big
cluster, two regulators come from different PMICs. In this case, when
scaling voltage inputs of the cluster, the voltages of two regulator
inputs need to be controlled by software explicitly under the SoC
specific limitation:

	100mV < Vsram - Vproc < 200mV

which is called 'voltage tracking' mechanism. And when scaling the
frequency of cluster clock input, the input MUX need to be parented to
another "intermediate" stable PLL first and reparented to the original
PLL once the original PLL is stable at the target frequency. This patch
implements those mechanisms to enable CPU DVFS support for Mediatek
MT8173 SoC.

Signed-off-by: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig.arm      |   7 +
 drivers/cpufreq/Makefile         |   1 +
 drivers/cpufreq/mt8173-cpufreq.c | 527 +++++++++++++++++++++++++++++++
 3 files changed, 535 insertions(+)
 create mode 100644 drivers/cpufreq/mt8173-cpufreq.c

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index cc8a71c267b8..2bacf24a19a9 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -130,6 +130,13 @@ config ARM_KIRKWOOD_CPUFREQ
 	  This adds the CPUFreq driver for Marvell Kirkwood
 	  SoCs.
 
+config ARM_MT8173_CPUFREQ
+	bool "Mediatek MT8173 CPUFreq support"
+	depends on ARCH_MEDIATEK && REGULATOR
+	select PM_OPP
+	help
+	  This adds the CPUFreq driver support for Mediatek MT8173 SoC.
+
 config ARM_OMAP2PLUS_CPUFREQ
 	bool "TI OMAP2+"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 2169bf792db7..9c75fafd2901 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ)	+= hisi-acpu-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
 obj-$(CONFIG_ARM_INTEGRATOR)		+= integrator-cpufreq.o
 obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ)	+= kirkwood-cpufreq.o
+obj-$(CONFIG_ARM_MT8173_CPUFREQ)	+= mt8173-cpufreq.o
 obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ)	+= omap-cpufreq.o
 obj-$(CONFIG_ARM_PXA2xx_CPUFREQ)	+= pxa2xx-cpufreq.o
 obj-$(CONFIG_PXA3xx)			+= pxa3xx-cpufreq.o
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
new file mode 100644
index 000000000000..49caed293a3b
--- /dev/null
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2015 Linaro Ltd.
+ * Author: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/cpu_cooling.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+
+#define MIN_VOLT_SHIFT		(100000)
+#define MAX_VOLT_SHIFT		(200000)
+#define MAX_VOLT_LIMIT		(1150000)
+#define VOLT_TOL		(10000)
+
+/*
+ * The struct mtk_cpu_dvfs_info holds necessary information for doing CPU DVFS
+ * on each CPU power/clock domain of Mediatek SoCs. Each CPU cluster in
+ * Mediatek SoCs has two voltage inputs, Vproc and Vsram. In some cases the two
+ * voltage inputs need to be controlled under a hardware limitation:
+ * 100mV < Vsram - Vproc < 200mV
+ *
+ * When scaling the clock frequency of a CPU clock domain, the clock source
+ * needs to be switched to another stable PLL clock temporarily until
+ * the original PLL becomes stable at target frequency.
+ */
+struct mtk_cpu_dvfs_info {
+	struct device *cpu_dev;
+	struct regulator *proc_reg;
+	struct regulator *sram_reg;
+	struct clk *cpu_clk;
+	struct clk *inter_clk;
+	struct thermal_cooling_device *cdev;
+	int intermediate_voltage;
+	bool need_voltage_tracking;
+};
+
+static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
+					int new_vproc)
+{
+	struct regulator *proc_reg = info->proc_reg;
+	struct regulator *sram_reg = info->sram_reg;
+	int old_vproc, old_vsram, new_vsram, vsram, vproc, ret;
+
+	old_vproc = regulator_get_voltage(proc_reg);
+	old_vsram = regulator_get_voltage(sram_reg);
+	/* Vsram should not exceed the maximum allowed voltage of SoC. */
+	new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT);
+
+	if (old_vproc < new_vproc) {
+		/*
+		 * When scaling up voltages, Vsram and Vproc scale up step
+		 * by step. At each step, set Vsram to (Vproc + 200mV) first,
+		 * then set Vproc to (Vsram - 100mV).
+		 * Keep doing it until Vsram and Vproc hit target voltages.
+		 */
+		do {
+			old_vsram = regulator_get_voltage(sram_reg);
+			old_vproc = regulator_get_voltage(proc_reg);
+
+			vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT);
+
+			if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) {
+				vsram = MAX_VOLT_LIMIT;
+
+				/*
+				 * If the target Vsram hits the maximum voltage,
+				 * try to set the exact voltage value first.
+				 */
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram);
+				if (ret)
+					ret = regulator_set_voltage(sram_reg,
+							vsram - VOLT_TOL,
+							vsram);
+
+				vproc = new_vproc;
+			} else {
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram + VOLT_TOL);
+
+				vproc = vsram - MIN_VOLT_SHIFT;
+			}
+			if (ret)
+				return ret;
+
+			ret = regulator_set_voltage(proc_reg, vproc,
+						    vproc + VOLT_TOL);
+			if (ret) {
+				regulator_set_voltage(sram_reg, old_vsram,
+						      old_vsram);
+				return ret;
+			}
+		} while (vproc < new_vproc || vsram < new_vsram);
+	} else if (old_vproc > new_vproc) {
+		/*
+		 * When scaling down voltages, Vsram and Vproc scale down step
+		 * by step. At each step, set Vproc to (Vsram - 200mV) first,
+		 * then set Vproc to (Vproc + 100mV).
+		 * Keep doing it until Vsram and Vproc hit target voltages.
+		 */
+		do {
+			old_vproc = regulator_get_voltage(proc_reg);
+			old_vsram = regulator_get_voltage(sram_reg);
+
+			vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT);
+			ret = regulator_set_voltage(proc_reg, vproc,
+						    vproc + VOLT_TOL);
+			if (ret)
+				return ret;
+
+			if (vproc == new_vproc)
+				vsram = new_vsram;
+			else
+				vsram = max(new_vsram, vproc + MIN_VOLT_SHIFT);
+
+			if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) {
+				vsram = MAX_VOLT_LIMIT;
+
+				/*
+				 * If the target Vsram hits the maximum voltage,
+				 * try to set the exact voltage value first.
+				 */
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram);
+				if (ret)
+					ret = regulator_set_voltage(sram_reg,
+							vsram - VOLT_TOL,
+							vsram);
+			} else {
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram + VOLT_TOL);
+			}
+
+			if (ret) {
+				regulator_set_voltage(proc_reg, old_vproc,
+						      old_vproc);
+				return ret;
+			}
+		} while (vproc > new_vproc + VOLT_TOL ||
+			 vsram > new_vsram + VOLT_TOL);
+	}
+
+	return 0;
+}
+
+static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc)
+{
+	if (info->need_voltage_tracking)
+		return mtk_cpufreq_voltage_tracking(info, vproc);
+	else
+		return regulator_set_voltage(info->proc_reg, vproc,
+					     vproc + VOLT_TOL);
+}
+
+static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
+				  unsigned int index)
+{
+	struct cpufreq_frequency_table *freq_table = policy->freq_table;
+	struct clk *cpu_clk = policy->clk;
+	struct clk *armpll = clk_get_parent(cpu_clk);
+	struct mtk_cpu_dvfs_info *info = policy->driver_data;
+	struct device *cpu_dev = info->cpu_dev;
+	struct dev_pm_opp *opp;
+	long freq_hz, old_freq_hz;
+	int vproc, old_vproc, inter_vproc, target_vproc, ret;
+
+	inter_vproc = info->intermediate_voltage;
+
+	old_freq_hz = clk_get_rate(cpu_clk);
+	old_vproc = regulator_get_voltage(info->proc_reg);
+
+	freq_hz = freq_table[index].frequency * 1000;
+
+	rcu_read_lock();
+	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		pr_err("cpu%d: failed to find OPP for %ld\n",
+		       policy->cpu, freq_hz);
+		return PTR_ERR(opp);
+	}
+	vproc = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+
+	/*
+	 * If the new voltage or the intermediate voltage is higher than the
+	 * current voltage, scale up voltage first.
+	 */
+	target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc;
+	if (old_vproc < target_vproc) {
+		ret = mtk_cpufreq_set_voltage(info, target_vproc);
+		if (ret) {
+			pr_err("cpu%d: failed to scale up voltage!\n",
+			       policy->cpu);
+			mtk_cpufreq_set_voltage(info, old_vproc);
+			return ret;
+		}
+	}
+
+	/* Reparent the CPU clock to intermediate clock. */
+	ret = clk_set_parent(cpu_clk, info->inter_clk);
+	if (ret) {
+		pr_err("cpu%d: failed to re-parent cpu clock!\n",
+		       policy->cpu);
+		mtk_cpufreq_set_voltage(info, old_vproc);
+		WARN_ON(1);
+		return ret;
+	}
+
+	/* Set the original PLL to target rate. */
+	ret = clk_set_rate(armpll, freq_hz);
+	if (ret) {
+		pr_err("cpu%d: failed to scale cpu clock rate!\n",
+		       policy->cpu);
+		clk_set_parent(cpu_clk, armpll);
+		mtk_cpufreq_set_voltage(info, old_vproc);
+		return ret;
+	}
+
+	/* Set parent of CPU clock back to the original PLL. */
+	ret = clk_set_parent(cpu_clk, armpll);
+	if (ret) {
+		pr_err("cpu%d: failed to re-parent cpu clock!\n",
+		       policy->cpu);
+		mtk_cpufreq_set_voltage(info, inter_vproc);
+		WARN_ON(1);
+		return ret;
+	}
+
+	/*
+	 * If the new voltage is lower than the intermediate voltage or the
+	 * original voltage, scale down to the new voltage.
+	 */
+	if (vproc < inter_vproc || vproc < old_vproc) {
+		ret = mtk_cpufreq_set_voltage(info, vproc);
+		if (ret) {
+			pr_err("cpu%d: failed to scale down voltage!\n",
+			       policy->cpu);
+			clk_set_parent(cpu_clk, info->inter_clk);
+			clk_set_rate(armpll, old_freq_hz);
+			clk_set_parent(cpu_clk, armpll);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void mtk_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	struct mtk_cpu_dvfs_info *info = policy->driver_data;
+	struct device_node *np = of_node_get(info->cpu_dev->of_node);
+
+	if (WARN_ON(!np))
+		return;
+
+	if (of_find_property(np, "#cooling-cells", NULL)) {
+		info->cdev = of_cpufreq_cooling_register(np,
+							 policy->related_cpus);
+
+		if (IS_ERR(info->cdev)) {
+			dev_err(info->cpu_dev,
+				"running cpufreq without cooling device: %ld\n",
+				PTR_ERR(info->cdev));
+
+			info->cdev = NULL;
+		}
+	}
+
+	of_node_put(np);
+}
+
+static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
+{
+	struct device *cpu_dev;
+	struct regulator *proc_reg = ERR_PTR(-ENODEV);
+	struct regulator *sram_reg = ERR_PTR(-ENODEV);
+	struct clk *cpu_clk = ERR_PTR(-ENODEV);
+	struct clk *inter_clk = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *opp;
+	unsigned long rate;
+	int ret;
+
+	cpu_dev = get_cpu_device(cpu);
+	if (!cpu_dev) {
+		pr_err("failed to get cpu%d device\n", cpu);
+		return -ENODEV;
+	}
+
+	cpu_clk = clk_get(cpu_dev, "cpu");
+	if (IS_ERR(cpu_clk)) {
+		if (PTR_ERR(cpu_clk) == -EPROBE_DEFER)
+			pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu);
+		else
+			pr_err("failed to get cpu clk for cpu%d\n", cpu);
+
+		ret = PTR_ERR(cpu_clk);
+		return ret;
+	}
+
+	inter_clk = clk_get(cpu_dev, "intermediate");
+	if (IS_ERR(inter_clk)) {
+		if (PTR_ERR(inter_clk) == -EPROBE_DEFER)
+			pr_warn("intermediate clk for cpu%d not ready, retry.\n",
+				cpu);
+		else
+			pr_err("failed to get intermediate clk for cpu%d\n",
+			       cpu);
+
+		ret = PTR_ERR(inter_clk);
+		goto out_free_resources;
+	}
+
+	proc_reg = regulator_get_exclusive(cpu_dev, "proc");
+	if (IS_ERR(proc_reg)) {
+		if (PTR_ERR(proc_reg) == -EPROBE_DEFER)
+			pr_warn("proc regulator for cpu%d not ready, retry.\n",
+				cpu);
+		else
+			pr_err("failed to get proc regulator for cpu%d\n",
+			       cpu);
+
+		ret = PTR_ERR(proc_reg);
+		goto out_free_resources;
+	}
+
+	/* Both presence and absence of sram regulator are valid cases. */
+	sram_reg = regulator_get_exclusive(cpu_dev, "sram");
+
+	ret = of_init_opp_table(cpu_dev);
+	if (ret) {
+		pr_warn("no OPP table for cpu%d\n", cpu);
+		goto out_free_resources;
+	}
+
+	/* Search a safe voltage for intermediate frequency. */
+	rate = clk_get_rate(inter_clk);
+	rcu_read_lock();
+	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		pr_err("failed to get intermediate opp for cpu%d\n", cpu);
+		ret = PTR_ERR(opp);
+		goto out_free_opp_table;
+	}
+	info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+
+	info->cpu_dev = cpu_dev;
+	info->proc_reg = proc_reg;
+	info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg;
+	info->cpu_clk = cpu_clk;
+	info->inter_clk = inter_clk;
+
+	/*
+	 * If SRAM regulator is present, software "voltage tracking" is needed
+	 * for this CPU power domain.
+	 */
+	info->need_voltage_tracking = !IS_ERR(sram_reg);
+
+	return 0;
+
+out_free_opp_table:
+	of_free_opp_table(cpu_dev);
+
+out_free_resources:
+	if (!IS_ERR(proc_reg))
+		regulator_put(proc_reg);
+	if (!IS_ERR(sram_reg))
+		regulator_put(sram_reg);
+	if (!IS_ERR(cpu_clk))
+		clk_put(cpu_clk);
+	if (!IS_ERR(inter_clk))
+		clk_put(inter_clk);
+
+	return ret;
+}
+
+static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info)
+{
+	if (!IS_ERR(info->proc_reg))
+		regulator_put(info->proc_reg);
+	if (!IS_ERR(info->sram_reg))
+		regulator_put(info->sram_reg);
+	if (!IS_ERR(info->cpu_clk))
+		clk_put(info->cpu_clk);
+	if (!IS_ERR(info->inter_clk))
+		clk_put(info->inter_clk);
+
+	of_free_opp_table(info->cpu_dev);
+}
+
+static int mtk_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct mtk_cpu_dvfs_info *info;
+	struct cpufreq_frequency_table *freq_table;
+	int ret;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	ret = mtk_cpu_dvfs_info_init(info, policy->cpu);
+	if (ret) {
+		pr_err("%s failed to initialize dvfs info for cpu%d\n",
+		       __func__, policy->cpu);
+		goto out_free_dvfs_info;
+	}
+
+	ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table);
+	if (ret) {
+		pr_err("failed to init cpufreq table for cpu%d: %d\n",
+		       policy->cpu, ret);
+		goto out_release_dvfs_info;
+	}
+
+	ret = cpufreq_table_validate_and_show(policy, freq_table);
+	if (ret) {
+		pr_err("%s: invalid frequency table: %d\n", __func__, ret);
+		goto out_free_cpufreq_table;
+	}
+
+	/* CPUs in the same cluster share a clock and power domain. */
+	cpumask_copy(policy->cpus, &cpu_topology[policy->cpu].core_sibling);
+	policy->driver_data = info;
+	policy->clk = info->cpu_clk;
+
+	return 0;
+
+out_free_cpufreq_table:
+	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &freq_table);
+
+out_release_dvfs_info:
+	mtk_cpu_dvfs_info_release(info);
+
+out_free_dvfs_info:
+	kfree(info);
+
+	return ret;
+}
+
+static int mtk_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	struct mtk_cpu_dvfs_info *info = policy->driver_data;
+
+	cpufreq_cooling_unregister(info->cdev);
+	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table);
+	mtk_cpu_dvfs_info_release(info);
+	kfree(info);
+
+	return 0;
+}
+
+static struct cpufreq_driver mt8173_cpufreq_driver = {
+	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+	.verify = cpufreq_generic_frequency_table_verify,
+	.target_index = mtk_cpufreq_set_target,
+	.get = cpufreq_generic_get,
+	.init = mtk_cpufreq_init,
+	.exit = mtk_cpufreq_exit,
+	.ready = mtk_cpufreq_ready,
+	.name = "mtk-cpufreq",
+	.attr = cpufreq_generic_attr,
+};
+
+static int mt8173_cpufreq_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = cpufreq_register_driver(&mt8173_cpufreq_driver);
+	if (ret)
+		pr_err("failed to register mtk cpufreq driver\n");
+
+	return ret;
+}
+
+static struct platform_driver mt8173_cpufreq_platdrv = {
+	.driver = {
+		.name	= "mt8173-cpufreq",
+	},
+	.probe		= mt8173_cpufreq_probe,
+};
+
+static int mt8173_cpufreq_driver_init(void)
+{
+	struct platform_device *pdev;
+	int err;
+
+	if (!of_machine_is_compatible("mediatek,mt8173"))
+		return -ENODEV;
+
+	err = platform_driver_register(&mt8173_cpufreq_platdrv);
+	if (err)
+		return err;
+
+	/*
+	 * Since there's no place to hold device registration code and no
+	 * device tree based way to match cpufreq driver yet, both the driver
+	 * and the device registration codes are put here to handle defer
+	 * probing.
+	 */
+	pdev = platform_device_register_simple("mt8173-cpufreq", -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		pr_err("failed to register mtk-cpufreq platform device\n");
+		return PTR_ERR(pdev);
+	}
+
+	return 0;
+}
+device_initcall(mt8173_cpufreq_driver_init);

From 6bfb7c7434f75d29241413dc7e784295ba56de98 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 3 Aug 2015 08:36:14 +0530
Subject: [PATCH 44/53] cpufreq: remove redundant CPUFREQ_INCOMPATIBLE notifier
 event

What's being done from CPUFREQ_INCOMPATIBLE, can also be done with
CPUFREQ_ADJUST. There is nothing special with CPUFREQ_INCOMPATIBLE
notifier.

Kill CPUFREQ_INCOMPATIBLE and fix its usage sites.

This also updates the numbering of notifier events to remove holes.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/core.txt       | 7 ++-----
 drivers/acpi/processor_perflib.c      | 2 +-
 drivers/cpufreq/cpufreq.c             | 4 ----
 drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 4 ++--
 drivers/video/fbdev/pxafb.c           | 1 -
 drivers/video/fbdev/sa1100fb.c        | 1 -
 include/linux/cpufreq.h               | 9 ++++-----
 7 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index 70933eadc308..ba78e7c2a069 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -55,16 +55,13 @@ transition notifiers.
 ----------------------------
 
 These are notified when a new policy is intended to be set. Each
-CPUFreq policy notifier is called three times for a policy transition:
+CPUFreq policy notifier is called twice for a policy transition:
 
 1.) During CPUFREQ_ADJUST all CPUFreq notifiers may change the limit if
     they see a need for this - may it be thermal considerations or
     hardware limitations.
 
-2.) During CPUFREQ_INCOMPATIBLE only changes may be done in order to avoid
-    hardware failure.
-
-3.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy
+2.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy
    - if two hardware drivers failed to agree on a new policy before this
    stage, the incompatible hardware shall be shut down, and the user
    informed of this.
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 36b6da2918a6..91941f1cdecb 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -87,7 +87,7 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 	if (ignore_ppc)
 		return 0;
 
-	if (event != CPUFREQ_INCOMPATIBLE)
+	if (event != CPUFREQ_ADJUST)
 		return 0;
 
 	mutex_lock(&performance_mutex);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 76a26609d96b..293f47b814bf 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2206,10 +2206,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 			CPUFREQ_ADJUST, new_policy);
 
-	/* adjust if necessary - hardware incompatibility*/
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-			CPUFREQ_INCOMPATIBLE, new_policy);
-
 	/*
 	 * verify the cpu speed can be set within this limit, which might be
 	 * different to the first one
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index d29e8da396a0..7969f7690498 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -97,8 +97,8 @@ static int pmi_notifier(struct notifier_block *nb,
 	struct cpufreq_frequency_table *cbe_freqs;
 	u8 node;
 
-	/* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE
-	 * and CPUFREQ_NOTIFY policy events?)
+	/* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY
+	 * policy events?)
 	 */
 	if (event == CPUFREQ_START)
 		return 0;
diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index 7245611ec963..94813af97f09 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -1668,7 +1668,6 @@ pxafb_freq_policy(struct notifier_block *nb, unsigned long val, void *data)
 
 	switch (val) {
 	case CPUFREQ_ADJUST:
-	case CPUFREQ_INCOMPATIBLE:
 		pr_debug("min dma period: %d ps, "
 			"new clock %d kHz\n", pxafb_display_dma_period(var),
 			policy->max);
diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index 89dd7e02197f..dcf774c15889 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c
@@ -1042,7 +1042,6 @@ sa1100fb_freq_policy(struct notifier_block *nb, unsigned long val,
 
 	switch (val) {
 	case CPUFREQ_ADJUST:
-	case CPUFREQ_INCOMPATIBLE:
 		dev_dbg(fbi->dev, "min dma period: %d ps, "
 			"new clock %d kHz\n", sa1100fb_min_dma_period(fbi),
 			policy->max);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index bde1e567b3a9..bedcc90c0757 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -369,11 +369,10 @@ static inline void cpufreq_resume(void) {}
 
 /* Policy Notifiers  */
 #define CPUFREQ_ADJUST			(0)
-#define CPUFREQ_INCOMPATIBLE		(1)
-#define CPUFREQ_NOTIFY			(2)
-#define CPUFREQ_START			(3)
-#define CPUFREQ_CREATE_POLICY		(4)
-#define CPUFREQ_REMOVE_POLICY		(5)
+#define CPUFREQ_NOTIFY			(1)
+#define CPUFREQ_START			(2)
+#define CPUFREQ_CREATE_POLICY		(3)
+#define CPUFREQ_REMOVE_POLICY		(4)
 
 #ifdef CONFIG_CPU_FREQ
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);

From 8fa5b631f32238a16ae3db0db5b354f7b9eb20cb Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 3 Aug 2015 08:36:15 +0530
Subject: [PATCH 45/53] cpufreq: use memcpy() to copy policy

cpufreq_get_policy() is useful if the pointer to policy isn't available
in advance. But if it is available, then there is no need to call
cpufreq_get_policy(). Directly use memcpy() to copy the policy.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 293f47b814bf..86d69416821b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -606,9 +606,7 @@ static ssize_t store_##file_name					\
 	int ret, temp;							\
 	struct cpufreq_policy new_policy;				\
 									\
-	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
-	if (ret)							\
-		return -EINVAL;						\
+	memcpy(&new_policy, policy, sizeof(*policy));			\
 									\
 	ret = sscanf(buf, "%u", &new_policy.object);			\
 	if (ret != 1)							\
@@ -662,9 +660,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
 	char	str_governor[16];
 	struct cpufreq_policy new_policy;
 
-	ret = cpufreq_get_policy(&new_policy, policy->cpu);
-	if (ret)
-		return ret;
+	memcpy(&new_policy, policy, sizeof(*policy));
 
 	ret = sscanf(buf, "%15s", str_governor);
 	if (ret != 1)

From 14ca0bdfdd6b422027b9b733abb0bf151811eaa7 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 3 Aug 2015 08:36:16 +0530
Subject: [PATCH 46/53] cpufreq: update user_policy.* on success

'user_policy' caches properties of a policy that are set by userspace.
And these must be updated only if cpufreq core was successful in
updating them based on request from user space.

In store_scaling_governor(), we are updating user_policy.policy and
user_policy.governor even if cpufreq_set_policy() failed. That's
incorrect.

Fix this by updating user_policy.* only if we were successful in
updating the properties.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 86d69416821b..8e71d8e08439 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -671,14 +671,12 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
 		return -EINVAL;
 
 	ret = cpufreq_set_policy(policy, &new_policy);
+	if (ret)
+		return ret;
 
 	policy->user_policy.policy = policy->policy;
 	policy->user_policy.governor = policy->governor;
-
-	if (ret)
-		return ret;
-	else
-		return count;
+	return count;
 }
 
 /**

From e27f8bd248756310a6df8b67f96d41d5a693642c Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 3 Aug 2015 08:36:17 +0530
Subject: [PATCH 47/53] cpufreq: remove redundant 'governor' field from
 user_policy

Its always same as policy->governor, and there is no need to keep
another copy of it. Remove it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 7 ++-----
 include/linux/cpufreq.h   | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8e71d8e08439..3033952391fe 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -675,7 +675,6 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
 		return ret;
 
 	policy->user_policy.policy = policy->policy;
-	policy->user_policy.governor = policy->governor;
 	return count;
 }
 
@@ -1323,10 +1322,9 @@ static int cpufreq_online(unsigned int cpu)
 		goto out_exit_policy;
 	}
 
-	if (new_policy) {
+	if (new_policy)
 		policy->user_policy.policy = policy->policy;
-		policy->user_policy.governor = policy->governor;
-	}
+
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
@@ -2305,7 +2303,6 @@ int cpufreq_update_policy(unsigned int cpu)
 	new_policy.min = policy->user_policy.min;
 	new_policy.max = policy->user_policy.max;
 	new_policy.policy = policy->user_policy.policy;
-	new_policy.governor = policy->user_policy.governor;
 
 	/*
 	 * BIOS might change freq behind our back
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index bedcc90c0757..752bf8a5c314 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -55,7 +55,6 @@ struct cpufreq_real_policy {
 	unsigned int		min;    /* in kHz */
 	unsigned int		max;    /* in kHz */
 	unsigned int		policy; /* see above */
-	struct cpufreq_governor	*governor; /* see below */
 };
 
 struct cpufreq_policy {

From 88dc4384958759510db248bf9158434ac783d407 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 3 Aug 2015 08:36:18 +0530
Subject: [PATCH 48/53] cpufreq: remove redundant 'policy' field from
 user_policy

Its always same as policy->policy, and there is no need to keep another
copy of it. Remove it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 10 +---------
 include/linux/cpufreq.h   |  1 -
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3033952391fe..a80bd68bbd74 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -671,11 +671,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
 		return -EINVAL;
 
 	ret = cpufreq_set_policy(policy, &new_policy);
-	if (ret)
-		return ret;
-
-	policy->user_policy.policy = policy->policy;
-	return count;
+	return ret ? ret : count;
 }
 
 /**
@@ -1322,9 +1318,6 @@ static int cpufreq_online(unsigned int cpu)
 		goto out_exit_policy;
 	}
 
-	if (new_policy)
-		policy->user_policy.policy = policy->policy;
-
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
@@ -2302,7 +2295,6 @@ int cpufreq_update_policy(unsigned int cpu)
 	memcpy(&new_policy, policy, sizeof(*policy));
 	new_policy.min = policy->user_policy.min;
 	new_policy.max = policy->user_policy.max;
-	new_policy.policy = policy->user_policy.policy;
 
 	/*
 	 * BIOS might change freq behind our back
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 752bf8a5c314..54dbbff0a55e 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -54,7 +54,6 @@ struct cpufreq_cpuinfo {
 struct cpufreq_real_policy {
 	unsigned int		min;    /* in kHz */
 	unsigned int		max;    /* in kHz */
-	unsigned int		policy; /* see above */
 };
 
 struct cpufreq_policy {

From c7a7b418dd1991079dd7ef03fec7d1863ef96154 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 3 Aug 2015 08:36:19 +0530
Subject: [PATCH 49/53] cpufreq: rename cpufreq_real_policy as
 cpufreq_user_policy

Its all about caching min/max freq requested by userspace, and
the name 'cpufreq_real_policy' doesn't fit that well. Rename it to
cpufreq_user_policy.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 54dbbff0a55e..6ff6a4d95eea 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -51,7 +51,7 @@ struct cpufreq_cpuinfo {
 	unsigned int		transition_latency;
 };
 
-struct cpufreq_real_policy {
+struct cpufreq_user_policy {
 	unsigned int		min;    /* in kHz */
 	unsigned int		max;    /* in kHz */
 };
@@ -86,7 +86,7 @@ struct cpufreq_policy {
 	struct work_struct	update; /* if update_policy() needs to be
 					 * called, but you're in IRQ context */
 
-	struct cpufreq_real_policy	user_policy;
+	struct cpufreq_user_policy user_policy;
 	struct cpufreq_frequency_table	*freq_table;
 
 	struct list_head        policy_list;

From 36dfef23cd26a6d3b71dd86509e34d311f1cd906 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 3 Aug 2015 08:36:20 +0530
Subject: [PATCH 50/53] cpufreq: drop !cpufreq_driver check from
 cpufreq_parse_governor()

Driver is guaranteed to be present on a call to cpufreq_parse_governor()
and there is no need to check for !cpufreq_driver. Drop it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a80bd68bbd74..a05cc75cc45d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -520,9 +520,6 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
 {
 	int err = -EINVAL;
 
-	if (!cpufreq_driver)
-		goto out;
-
 	if (cpufreq_driver->setpolicy) {
 		if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
 			*policy = CPUFREQ_POLICY_PERFORMANCE;
@@ -557,7 +554,6 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
 
 		mutex_unlock(&cpufreq_governor_mutex);
 	}
-out:
 	return err;
 }
 

From a482e5562e48d89ea50f41f9fc6ed3a9768de2ff Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 7 Aug 2015 09:59:17 +0200
Subject: [PATCH 51/53] cpufreq: sfi: use kmemdup rather than duplicating its
 implementation

The patch was generated using fixed coccinelle semantic patch
scripts/coccinelle/api/memdup.cocci [1].

[1]: http://permalink.gmane.org/gmane.linux.kernel/2014320

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/sfi-cpufreq.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c
index ffa3389e535b..992ce6f9abec 100644
--- a/drivers/cpufreq/sfi-cpufreq.c
+++ b/drivers/cpufreq/sfi-cpufreq.c
@@ -45,12 +45,10 @@ static int sfi_parse_freq(struct sfi_table_header *table)
 	pentry = (struct sfi_freq_table_entry *)sb->pentry;
 	totallen = num_freq_table_entries * sizeof(*pentry);
 
-	sfi_cpufreq_array = kzalloc(totallen, GFP_KERNEL);
+	sfi_cpufreq_array = kmemdup(pentry, totallen, GFP_KERNEL);
 	if (!sfi_cpufreq_array)
 		return -ENOMEM;
 
-	memcpy(sfi_cpufreq_array, pentry, totallen);
-
 	return 0;
 }
 

From 309d0631cc329ca1051c631c89e0acc9b752cb4d Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Thu, 27 Aug 2015 14:41:44 +0530
Subject: [PATCH 52/53] cpufreq: powernv: Increase the verbosity of OCC console
 messages

Modify the OCC reset/load/active event message to make it clearer for
the user to understand the event and effect of the event.

Suggested-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 546e056e416d..64994e10638e 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -465,6 +465,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 	switch (omsg.type) {
 	case OCC_RESET:
 		occ_reset = true;
+		pr_info("OCC (On Chip Controller - enforces hard thermal/power limits) Resetting\n");
 		/*
 		 * powernv_cpufreq_throttle_check() is called in
 		 * target() callback which can detect the throttle state
@@ -474,12 +475,12 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 		 */
 		if (!throttled) {
 			throttled = true;
-			pr_crit("CPU Frequency is throttled\n");
+			pr_crit("CPU frequency is throttled for duration\n");
 		}
-		pr_info("OCC: Reset\n");
+
 		break;
 	case OCC_LOAD:
-		pr_info("OCC: Loaded\n");
+		pr_info("OCC Loading, CPU frequency is throttled until OCC is started\n");
 		break;
 	case OCC_THROTTLE:
 		omsg.chip = be64_to_cpu(msg->params[1]);
@@ -488,7 +489,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 		if (occ_reset) {
 			occ_reset = false;
 			throttled = false;
-			pr_info("OCC: Active\n");
+			pr_info("OCC Active, CPU frequency is no longer throttled\n");
 
 			for (i = 0; i < nr_chips; i++) {
 				chips[i].restore = true;

From 72e624de6e6f0d5a638fbc23842aa76ae048e9e7 Mon Sep 17 00:00:00 2001
From: Abhilash Jindal <klock.android@gmail.com>
Date: Tue, 11 Aug 2015 12:01:22 -0400
Subject: [PATCH 53/53] cpufreq: speedstep-lib: Use monotonic clock

Wall time obtained from do_gettimeofday is susceptible to sudden jumps due to
user setting the time or due to NTP.

Monotonic time is constantly increasing time better suited for comparing two
timestamps.

Signed-off-by: Abhilash Jindal <klock.android@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/speedstep-lib.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index 4ab7a2156672..15d3214aaa00 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -386,7 +386,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	unsigned int prev_speed;
 	unsigned int ret = 0;
 	unsigned long flags;
-	struct timeval tv1, tv2;
+	ktime_t tv1, tv2;
 
 	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
 		return -EINVAL;
@@ -415,14 +415,14 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 
 	/* start latency measurement */
 	if (transition_latency)
-		do_gettimeofday(&tv1);
+		tv1 = ktime_get();
 
 	/* switch to high state */
 	set_state(SPEEDSTEP_HIGH);
 
 	/* end latency measurement */
 	if (transition_latency)
-		do_gettimeofday(&tv2);
+		tv2 = ktime_get();
 
 	*high_speed = speedstep_get_frequency(processor);
 	if (!*high_speed) {
@@ -442,8 +442,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		set_state(SPEEDSTEP_LOW);
 
 	if (transition_latency) {
-		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
-			tv2.tv_usec - tv1.tv_usec;
+		*transition_latency = ktime_to_us(ktime_sub(tv2, tv1));
 		pr_debug("transition latency is %u uSec\n", *transition_latency);
 
 		/* convert uSec to nSec and add 20% for safety reasons */