diff --git a/MAINTAINERS b/MAINTAINERS index 38d3e4ed7208..56a92ec7d3f7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3463,6 +3463,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) B: https://bugzilla.kernel.org F: Documentation/cpu-freq/ +F: Documentation/devicetree/bindings/cpufreq/ F: drivers/cpufreq/ F: include/linux/cpufreq.h F: tools/testing/selftests/cpufreq/ diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index 162e1eb5373d..6c5affe2d0f5 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -1189,11 +1189,6 @@ status = "disabled"; }; - cpufreq-cooling { - compatible = "stericsson,db8500-cpufreq-cooling"; - status = "disabled"; - }; - mcde@a0350000 { compatible = "stericsson,mcde"; reg = <0xa0350000 0x1000>, /* MCDE */ diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 74fa5c5904d3..74ed7e9a7f27 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -247,6 +247,12 @@ config ARM_TEGRA124_CPUFREQ help This adds the CPUFreq driver support for Tegra124 SOCs. +config ARM_TEGRA186_CPUFREQ + tristate "Tegra186 CPUFreq support" + depends on ARCH_TEGRA && TEGRA_BPMP + help + This adds the CPUFreq driver support for Tegra186 SOCs. + config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 9f5a8045f36d..b7e78f063c4f 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o +obj-$(CONFIG_ARM_TEGRA186_CPUFREQ) += tegra186-cpufreq.o obj-$(CONFIG_ARM_TI_CPUFREQ) += ti-cpufreq.o obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c index 5c3ec1dd4921..3575b82210ba 100644 --- a/drivers/cpufreq/dbx500-cpufreq.c +++ b/drivers/cpufreq/dbx500-cpufreq.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,7 @@ static struct cpufreq_frequency_table *freq_table; static struct clk *armss_clk; +static struct thermal_cooling_device *cdev; static int dbx500_cpufreq_target(struct cpufreq_policy *policy, unsigned int index) @@ -32,6 +34,22 @@ static int dbx500_cpufreq_init(struct cpufreq_policy *policy) return cpufreq_generic_init(policy, freq_table, 20 * 1000); } +static int dbx500_cpufreq_exit(struct cpufreq_policy *policy) +{ + if (!IS_ERR(cdev)) + cpufreq_cooling_unregister(cdev); + return 0; +} + +static void dbx500_cpufreq_ready(struct cpufreq_policy *policy) +{ + cdev = cpufreq_cooling_register(policy->cpus); + if (IS_ERR(cdev)) + pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev)); + else + pr_info("Cooling device registered: %s\n", cdev->type); +} + static struct cpufreq_driver dbx500_cpufreq_driver = { .flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK, @@ -39,6 +57,8 @@ static struct cpufreq_driver dbx500_cpufreq_driver = { .target_index = dbx500_cpufreq_target, .get = cpufreq_generic_get, .init = dbx500_cpufreq_init, + .exit = dbx500_cpufreq_exit, + .ready = dbx500_cpufreq_ready, .name = "DBX500", .attr = cpufreq_generic_attr, }; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 7719b02e04f5..9c13f097fd8c 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -161,8 +161,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) static int imx6q_cpufreq_init(struct cpufreq_policy *policy) { + int ret; + policy->clk = arm_clk; - return cpufreq_generic_init(policy, freq_table, transition_latency); + ret = cpufreq_generic_init(policy, freq_table, transition_latency); + policy->suspend_freq = policy->max; + + return ret; } static struct cpufreq_driver imx6q_cpufreq_driver = { @@ -173,6 +178,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = { .init = imx6q_cpufreq_init, .name = "imx6q-cpufreq", .attr = cpufreq_generic_attr, + .suspend = cpufreq_generic_suspend, }; static int imx6q_cpufreq_probe(struct platform_device *pdev) @@ -222,6 +228,13 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) arm_reg = regulator_get(cpu_dev, "arm"); pu_reg = regulator_get_optional(cpu_dev, "pu"); soc_reg = regulator_get(cpu_dev, "soc"); + if (PTR_ERR(arm_reg) == -EPROBE_DEFER || + PTR_ERR(soc_reg) == -EPROBE_DEFER || + PTR_ERR(pu_reg) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + dev_dbg(cpu_dev, "regulators not ready, defer\n"); + goto put_reg; + } if (IS_ERR(arm_reg) || IS_ERR(soc_reg)) { dev_err(cpu_dev, "failed to get regulators\n"); ret = -ENOENT; @@ -255,7 +268,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); - goto put_reg; + goto out_free_opp; } /* Make imx6_soc_volt array's size same as arm opp number */ diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 283491f742d3..b7de5bd76a31 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -37,7 +37,11 @@ #include #include +#define INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) +#define INTEL_PSTATE_HWP_SAMPLING_INTERVAL (50 * NSEC_PER_MSEC) + #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 +#define INTEL_CPUFREQ_TRANSITION_DELAY 500 #ifdef CONFIG_ACPI #include @@ -74,6 +78,11 @@ static inline int ceiling_fp(int32_t x) return ret; } +static inline int32_t percent_fp(int percent) +{ + return div_fp(percent, 100); +} + static inline u64 mul_ext_fp(u64 x, u64 y) { return (x * y) >> EXT_FRAC_BITS; @@ -186,45 +195,22 @@ struct _pid { }; /** - * struct perf_limits - Store user and policy limits - * @no_turbo: User requested turbo state from intel_pstate sysfs - * @turbo_disabled: Platform turbo status either from msr - * MSR_IA32_MISC_ENABLE or when maximum available pstate - * matches the maximum turbo pstate - * @max_perf_pct: Effective maximum performance limit in percentage, this - * is minimum of either limits enforced by cpufreq policy - * or limits from user set limits via intel_pstate sysfs - * @min_perf_pct: Effective minimum performance limit in percentage, this - * is maximum of either limits enforced by cpufreq policy - * or limits from user set limits via intel_pstate sysfs - * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct - * This value is used to limit max pstate - * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct - * This value is used to limit min pstate - * @max_policy_pct: The maximum performance in percentage enforced by - * cpufreq setpolicy interface - * @max_sysfs_pct: The maximum performance in percentage enforced by - * intel pstate sysfs interface, unused when per cpu - * controls are enforced - * @min_policy_pct: The minimum performance in percentage enforced by - * cpufreq setpolicy interface - * @min_sysfs_pct: The minimum performance in percentage enforced by - * intel pstate sysfs interface, unused when per cpu - * controls are enforced - * - * Storage for user and policy defined limits. + * struct global_params - Global parameters, mostly tunable via sysfs. + * @no_turbo: Whether or not to use turbo P-states. + * @turbo_disabled: Whethet or not turbo P-states are available at all, + * based on the MSR_IA32_MISC_ENABLE value and whether or + * not the maximum reported turbo P-state is different from + * the maximum reported non-turbo one. + * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo + * P-state capacity. + * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo + * P-state capacity. */ -struct perf_limits { - int no_turbo; - int turbo_disabled; +struct global_params { + bool no_turbo; + bool turbo_disabled; int max_perf_pct; int min_perf_pct; - int32_t max_perf; - int32_t min_perf; - int max_policy_pct; - int max_sysfs_pct; - int min_policy_pct; - int min_sysfs_pct; }; /** @@ -245,9 +231,10 @@ struct perf_limits { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data - * @perf_limits: Pointer to perf_limit unique to this CPU - * Not all field in the structure are applicable - * when per cpu controls are enforced + * @min_perf: Minimum capacity limit as a fraction of the maximum + * turbo P-state capacity. + * @max_perf: Maximum capacity limit as a fraction of the maximum + * turbo P-state capacity. * @acpi_perf_data: Stores ACPI perf information read from _PSS * @valid_pss_table: Set to true for valid ACPI _PSS entries found * @epp_powersave: Last saved HWP energy performance preference @@ -279,7 +266,8 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; - struct perf_limits *perf_limits; + int32_t min_perf; + int32_t max_perf; #ifdef CONFIG_ACPI struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; @@ -324,7 +312,7 @@ struct pstate_adjust_policy { * @get_scaling: Callback to get frequency scaling factor * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms - * @get_target_pstate: Callback to a function to calculate next P state to use + * @update_util: Active mode utilization update callback. * * Core and Atom CPU models have different way to get P State limits. This * structure is used to store those callbacks. @@ -337,43 +325,31 @@ struct pstate_funcs { int (*get_scaling)(void); u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); - int32_t (*get_target_pstate)(struct cpudata *); + void (*update_util)(struct update_util_data *data, u64 time, + unsigned int flags); }; -/** - * struct cpu_defaults- Per CPU model default config data - * @pid_policy: PID config data - * @funcs: Callback function data - */ -struct cpu_defaults { - struct pstate_adjust_policy pid_policy; - struct pstate_funcs funcs; -}; - -static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); -static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); - -static struct pstate_adjust_policy pid_params __read_mostly; static struct pstate_funcs pstate_funcs __read_mostly; +static struct pstate_adjust_policy pid_params __read_mostly = { + .sample_rate_ms = 10, + .sample_rate_ns = 10 * NSEC_PER_MSEC, + .deadband = 0, + .setpoint = 97, + .p_gain_pct = 20, + .d_gain_pct = 0, + .i_gain_pct = 0, +}; + static int hwp_active __read_mostly; static bool per_cpu_limits __read_mostly; -static bool driver_registered __read_mostly; +static struct cpufreq_driver *intel_pstate_driver __read_mostly; #ifdef CONFIG_ACPI static bool acpi_ppc; #endif -static struct perf_limits global; - -static void intel_pstate_init_limits(struct perf_limits *limits) -{ - memset(limits, 0, sizeof(*limits)); - limits->max_perf_pct = 100; - limits->max_perf = int_ext_tofp(1); - limits->max_policy_pct = 100; - limits->max_sysfs_pct = 100; -} +static struct global_params global; static DEFINE_MUTEX(intel_pstate_driver_lock); static DEFINE_MUTEX(intel_pstate_limits_lock); @@ -530,29 +506,6 @@ static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) } #endif -static inline void pid_reset(struct _pid *pid, int setpoint, int busy, - int deadband, int integral) { - pid->setpoint = int_tofp(setpoint); - pid->deadband = int_tofp(deadband); - pid->integral = int_tofp(integral); - pid->last_err = int_tofp(setpoint) - int_tofp(busy); -} - -static inline void pid_p_gain_set(struct _pid *pid, int percent) -{ - pid->p_gain = div_fp(percent, 100); -} - -static inline void pid_i_gain_set(struct _pid *pid, int percent) -{ - pid->i_gain = div_fp(percent, 100); -} - -static inline void pid_d_gain_set(struct _pid *pid, int percent) -{ - pid->d_gain = div_fp(percent, 100); -} - static signed int pid_calc(struct _pid *pid, int32_t busy) { signed int result; @@ -590,23 +543,17 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) return (signed int)fp_toint(result); } -static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) +static inline void intel_pstate_pid_reset(struct cpudata *cpu) { - pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); - pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); - pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); + struct _pid *pid = &cpu->pid; - pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); -} - -static inline void intel_pstate_reset_all_pid(void) -{ - unsigned int cpu; - - for_each_online_cpu(cpu) { - if (all_cpu_data[cpu]) - intel_pstate_busy_pid_reset(all_cpu_data[cpu]); - } + pid->p_gain = percent_fp(pid_params.p_gain_pct); + pid->d_gain = percent_fp(pid_params.d_gain_pct); + pid->i_gain = percent_fp(pid_params.i_gain_pct); + pid->setpoint = int_tofp(pid_params.setpoint); + pid->last_err = pid->setpoint - int_tofp(100); + pid->deadband = int_tofp(pid_params.deadband); + pid->integral = 0; } static inline void update_turbo_state(void) @@ -621,6 +568,14 @@ static inline void update_turbo_state(void) cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } +static int min_perf_pct_min(void) +{ + struct cpudata *cpu = all_cpu_data[0]; + + return DIV_ROUND_UP(cpu->pstate.min_pstate * 100, + cpu->pstate.turbo_pstate); +} + static s16 intel_pstate_get_epb(struct cpudata *cpu_data) { u64 epb; @@ -838,96 +793,80 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_hwp_set(struct cpufreq_policy *policy) +static void intel_pstate_hwp_set(unsigned int cpu) { - int min, hw_min, max, hw_max, cpu; - struct perf_limits *perf_limits = &global; + struct cpudata *cpu_data = all_cpu_data[cpu]; + int min, hw_min, max, hw_max; u64 value, cap; + s16 epp; - for_each_cpu(cpu, policy->cpus) { - struct cpudata *cpu_data = all_cpu_data[cpu]; - s16 epp; + rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + hw_min = HWP_LOWEST_PERF(cap); + if (global.no_turbo) + hw_max = HWP_GUARANTEED_PERF(cap); + else + hw_max = HWP_HIGHEST_PERF(cap); - if (per_cpu_limits) - perf_limits = all_cpu_data[cpu]->perf_limits; + max = fp_ext_toint(hw_max * cpu_data->max_perf); + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) + min = max; + else + min = fp_ext_toint(hw_max * cpu_data->min_perf); - rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); - hw_min = HWP_LOWEST_PERF(cap); - if (global.no_turbo) - hw_max = HWP_GUARANTEED_PERF(cap); - else - hw_max = HWP_HIGHEST_PERF(cap); + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - max = fp_ext_toint(hw_max * perf_limits->max_perf); - if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) - min = max; - else - min = fp_ext_toint(hw_max * perf_limits->min_perf); + value &= ~HWP_MIN_PERF(~0L); + value |= HWP_MIN_PERF(min); - rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); + value &= ~HWP_MAX_PERF(~0L); + value |= HWP_MAX_PERF(max); - value &= ~HWP_MIN_PERF(~0L); - value |= HWP_MIN_PERF(min); + if (cpu_data->epp_policy == cpu_data->policy) + goto skip_epp; - value &= ~HWP_MAX_PERF(~0L); - value |= HWP_MAX_PERF(max); + cpu_data->epp_policy = cpu_data->policy; - if (cpu_data->epp_policy == cpu_data->policy) + if (cpu_data->epp_saved >= 0) { + epp = cpu_data->epp_saved; + cpu_data->epp_saved = -EINVAL; + goto update_epp; + } + + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { + epp = intel_pstate_get_epp(cpu_data, value); + cpu_data->epp_powersave = epp; + /* If EPP read was failed, then don't try to write */ + if (epp < 0) goto skip_epp; - cpu_data->epp_policy = cpu_data->policy; + epp = 0; + } else { + /* skip setting EPP, when saved value is invalid */ + if (cpu_data->epp_powersave < 0) + goto skip_epp; - if (cpu_data->epp_saved >= 0) { - epp = cpu_data->epp_saved; - cpu_data->epp_saved = -EINVAL; - goto update_epp; - } + /* + * No need to restore EPP when it is not zero. This + * means: + * - Policy is not changed + * - user has manually changed + * - Error reading EPB + */ + epp = intel_pstate_get_epp(cpu_data, value); + if (epp) + goto skip_epp; - if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { - epp = intel_pstate_get_epp(cpu_data, value); - cpu_data->epp_powersave = epp; - /* If EPP read was failed, then don't try to write */ - if (epp < 0) - goto skip_epp; - - - epp = 0; - } else { - /* skip setting EPP, when saved value is invalid */ - if (cpu_data->epp_powersave < 0) - goto skip_epp; - - /* - * No need to restore EPP when it is not zero. This - * means: - * - Policy is not changed - * - user has manually changed - * - Error reading EPB - */ - epp = intel_pstate_get_epp(cpu_data, value); - if (epp) - goto skip_epp; - - epp = cpu_data->epp_powersave; - } -update_epp: - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - } else { - intel_pstate_set_epb(cpu, epp); - } -skip_epp: - wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); + epp = cpu_data->epp_powersave; } -} - -static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) -{ - if (hwp_active) - intel_pstate_hwp_set(policy); - - return 0; +update_epp: + if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + } else { + intel_pstate_set_epb(cpu, epp); + } +skip_epp: + wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) @@ -944,20 +883,17 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) static int intel_pstate_resume(struct cpufreq_policy *policy) { - int ret; - if (!hwp_active) return 0; mutex_lock(&intel_pstate_limits_lock); all_cpu_data[policy->cpu]->epp_policy = 0; - - ret = intel_pstate_hwp_set_policy(policy); + intel_pstate_hwp_set(policy->cpu); mutex_unlock(&intel_pstate_limits_lock); - return ret; + return 0; } static void intel_pstate_update_policies(void) @@ -971,9 +907,14 @@ static void intel_pstate_update_policies(void) /************************** debugfs begin ************************/ static int pid_param_set(void *data, u64 val) { + unsigned int cpu; + *(u32 *)data = val; pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; - intel_pstate_reset_all_pid(); + for_each_possible_cpu(cpu) + if (all_cpu_data[cpu]) + intel_pstate_pid_reset(all_cpu_data[cpu]); + return 0; } @@ -1084,7 +1025,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1109,7 +1050,7 @@ static ssize_t show_num_pstates(struct kobject *kobj, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1129,7 +1070,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1157,7 +1098,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1174,6 +1115,15 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, global.no_turbo = clamp_t(int, input, 0, 1); + if (global.no_turbo) { + struct cpudata *cpu = all_cpu_data[0]; + int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate; + + /* Squash the global minimum into the permitted range. */ + if (global.min_perf_pct > pct) + global.min_perf_pct = pct; + } + mutex_unlock(&intel_pstate_limits_lock); intel_pstate_update_policies(); @@ -1195,18 +1145,14 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } mutex_lock(&intel_pstate_limits_lock); - global.max_sysfs_pct = clamp_t(int, input, 0 , 100); - global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct); - global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct); - global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct); - global.max_perf = percent_ext_fp(global.max_perf_pct); + global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100); mutex_unlock(&intel_pstate_limits_lock); @@ -1229,18 +1175,15 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } mutex_lock(&intel_pstate_limits_lock); - global.min_sysfs_pct = clamp_t(int, input, 0 , 100); - global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct); - global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct); - global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct); - global.min_perf = percent_ext_fp(global.min_perf_pct); + global.min_perf_pct = clamp_t(int, input, + min_perf_pct_min(), global.max_perf_pct); mutex_unlock(&intel_pstate_limits_lock); @@ -1554,132 +1497,10 @@ static int knl_get_turbo_pstate(void) return ret; } -static struct cpu_defaults core_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 97, - .p_gain_pct = 20, - .d_gain_pct = 0, - .i_gain_pct = 0, - }, - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_performance, - }, -}; - -static const struct cpu_defaults silvermont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = silvermont_get_scaling, - .get_vid = atom_get_vid, - .get_target_pstate = get_target_pstate_use_cpu_load, - }, -}; - -static const struct cpu_defaults airmont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = airmont_get_scaling, - .get_vid = atom_get_vid, - .get_target_pstate = get_target_pstate_use_cpu_load, - }, -}; - -static const struct cpu_defaults knl_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 97, - .p_gain_pct = 20, - .d_gain_pct = 0, - .i_gain_pct = 0, - }, - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = knl_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_performance, - }, -}; - -static const struct cpu_defaults bxt_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_cpu_load, - }, -}; - -static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) +static int intel_pstate_get_base_pstate(struct cpudata *cpu) { - int max_perf = cpu->pstate.turbo_pstate; - int max_perf_adj; - int min_perf; - struct perf_limits *perf_limits = &global; - - if (global.no_turbo || global.turbo_disabled) - max_perf = cpu->pstate.max_pstate; - - if (per_cpu_limits) - perf_limits = cpu->perf_limits; - - /* - * performance can be limited by user through sysfs, by cpufreq - * policy, or by cpu specific default values determined through - * experimentation. - */ - max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf); - *max = clamp_t(int, max_perf_adj, - cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - - min_perf = fp_ext_toint(max_perf * perf_limits->min_perf); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + return global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) @@ -1702,11 +1523,13 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu) static void intel_pstate_max_within_limits(struct cpudata *cpu) { - int min_pstate, max_pstate; + int pstate; update_turbo_state(); - intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); - intel_pstate_set_pstate(cpu, max_pstate); + pstate = intel_pstate_get_base_pstate(cpu); + pstate = max(cpu->pstate.min_pstate, + fp_ext_toint(pstate * cpu->max_perf)); + intel_pstate_set_pstate(cpu, pstate); } static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) @@ -1767,7 +1590,11 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) * that sample.time will always be reset before setting the utilization * update hook and make the caller skip the sample then. */ - return !!cpu->last_sample_time; + if (cpu->last_sample_time) { + intel_pstate_calc_avg_perf(cpu); + return true; + } + return false; } static inline int32_t get_avg_frequency(struct cpudata *cpu) @@ -1788,6 +1615,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) int32_t busy_frac, boost; int target, avg_pstate; + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) + return cpu->pstate.turbo_pstate; + busy_frac = div_fp(sample->mperf, sample->tsc); boost = cpu->iowait_boost; @@ -1824,6 +1654,9 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; u64 duration_ns; + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) + return cpu->pstate.turbo_pstate; + /* * perf_scaled is the ratio of the average P-state during the last * sampling period to the P-state requested last time (in percent). @@ -1858,11 +1691,13 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) { - int max_perf, min_perf; + int max_pstate = intel_pstate_get_base_pstate(cpu); + int min_pstate; - intel_pstate_get_min_max(cpu, &min_perf, &max_perf); - pstate = clamp_t(int, pstate, min_perf, max_perf); - return pstate; + min_pstate = max(cpu->pstate.min_pstate, + fp_ext_toint(max_pstate * cpu->min_perf)); + max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf)); + return clamp_t(int, pstate, min_pstate, max_pstate); } static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) @@ -1874,16 +1709,11 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } -static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate) { - int from, target_pstate; + int from = cpu->pstate.current_pstate; struct sample *sample; - from = cpu->pstate.current_pstate; - - target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? - cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); - update_turbo_state(); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); @@ -1902,76 +1732,155 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) fp_toint(cpu->iowait_boost * 100)); } +static void intel_pstate_update_util_hwp(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; + + if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL) + intel_pstate_sample(cpu, time); +} + +static void intel_pstate_update_util_pid(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; + + if ((s64)delta_ns < pid_params.sample_rate_ns) + return; + + if (intel_pstate_sample(cpu, time)) { + int target_pstate; + + target_pstate = get_target_pstate_use_performance(cpu); + intel_pstate_adjust_pstate(cpu, target_pstate); + } +} + static void intel_pstate_update_util(struct update_util_data *data, u64 time, unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns; - if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) { - if (flags & SCHED_CPUFREQ_IOWAIT) { - cpu->iowait_boost = int_tofp(1); - } else if (cpu->iowait_boost) { - /* Clear iowait_boost if the CPU may have been idle. */ - delta_ns = time - cpu->last_update; - if (delta_ns > TICK_NSEC) - cpu->iowait_boost = 0; - } - cpu->last_update = time; + if (flags & SCHED_CPUFREQ_IOWAIT) { + cpu->iowait_boost = int_tofp(1); + } else if (cpu->iowait_boost) { + /* Clear iowait_boost if the CPU may have been idle. */ + delta_ns = time - cpu->last_update; + if (delta_ns > TICK_NSEC) + cpu->iowait_boost = 0; } - + cpu->last_update = time; delta_ns = time - cpu->sample.time; - if ((s64)delta_ns >= pid_params.sample_rate_ns) { - bool sample_taken = intel_pstate_sample(cpu, time); + if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL) + return; - if (sample_taken) { - intel_pstate_calc_avg_perf(cpu); - if (!hwp_active) - intel_pstate_adjust_busy_pstate(cpu); - } + if (intel_pstate_sample(cpu, time)) { + int target_pstate; + + target_pstate = get_target_pstate_use_cpu_load(cpu); + intel_pstate_adjust_pstate(cpu, target_pstate); } } +static struct pstate_funcs core_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, +}; + +static const struct pstate_funcs silvermont_funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = silvermont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, +}; + +static const struct pstate_funcs airmont_funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = airmont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, +}; + +static const struct pstate_funcs knl_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, +}; + +static const struct pstate_funcs bxt_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util, +}; + #define ICPU(model, policy) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ (unsigned long)&policy } static const struct x86_cpu_id intel_pstate_cpu_ids[] = { - ICPU(INTEL_FAM6_SANDYBRIDGE, core_params), - ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_params), - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_params), - ICPU(INTEL_FAM6_IVYBRIDGE, core_params), - ICPU(INTEL_FAM6_HASWELL_CORE, core_params), - ICPU(INTEL_FAM6_BROADWELL_CORE, core_params), - ICPU(INTEL_FAM6_IVYBRIDGE_X, core_params), - ICPU(INTEL_FAM6_HASWELL_X, core_params), - ICPU(INTEL_FAM6_HASWELL_ULT, core_params), - ICPU(INTEL_FAM6_HASWELL_GT3E, core_params), - ICPU(INTEL_FAM6_BROADWELL_GT3E, core_params), - ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_params), - ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_params), - ICPU(INTEL_FAM6_BROADWELL_X, core_params), - ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), - ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), - ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_params), - ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), + ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), + ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), + ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_funcs), + ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), + ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs), + ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs), + ICPU(INTEL_FAM6_HASWELL_X, core_funcs), + ICPU(INTEL_FAM6_HASWELL_ULT, core_funcs), + ICPU(INTEL_FAM6_HASWELL_GT3E, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_GT3E, core_funcs), + ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs), + ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), + ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), + ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), + ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_funcs), + ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, bxt_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { - ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), - ICPU(INTEL_FAM6_BROADWELL_X, core_params), - ICPU(INTEL_FAM6_SKYLAKE_X, core_params), + ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), {} }; static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { - ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params), + ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs), {} }; +static bool pid_in_use(void); + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -1979,18 +1888,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu = all_cpu_data[cpunum]; if (!cpu) { - unsigned int size = sizeof(struct cpudata); - - if (per_cpu_limits) - size += sizeof(struct perf_limits); - - cpu = kzalloc(size, GFP_KERNEL); + cpu = kzalloc(sizeof(*cpu), GFP_KERNEL); if (!cpu) return -ENOMEM; all_cpu_data[cpunum] = cpu; - if (per_cpu_limits) - cpu->perf_limits = (struct perf_limits *)(cpu + 1); cpu->epp_default = -EINVAL; cpu->epp_powersave = -EINVAL; @@ -2009,14 +1911,12 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); - pid_params.sample_rate_ms = 50; - pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; + } else if (pid_in_use()) { + intel_pstate_pid_reset(cpu); } intel_pstate_get_cpu_pstates(cpu); - intel_pstate_busy_pid_reset(cpu); - pr_debug("controlling: cpu %d\n", cpunum); return 0; @@ -2039,7 +1939,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, - intel_pstate_update_util); + pstate_funcs.update_util); cpu->update_util_set = true; } @@ -2055,46 +1955,68 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) synchronize_sched(); } -static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, - struct perf_limits *limits) +static int intel_pstate_get_max_freq(struct cpudata *cpu) { + return global.turbo_disabled || global.no_turbo ? + cpu->pstate.max_freq : cpu->pstate.turbo_freq; +} + +static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, + struct cpudata *cpu) +{ + int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; - max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq); + max_policy_perf = div_ext_fp(policy->max, max_freq); max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1)); if (policy->max == policy->min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = div_ext_fp(policy->min, - policy->cpuinfo.max_freq); + min_policy_perf = div_ext_fp(policy->min, max_freq); min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } /* Normalize user input to [min_perf, max_perf] */ - limits->min_perf = max(min_policy_perf, - percent_ext_fp(limits->min_sysfs_pct)); - limits->min_perf = min(limits->min_perf, max_policy_perf); - limits->max_perf = min(max_policy_perf, - percent_ext_fp(limits->max_sysfs_pct)); - limits->max_perf = max(min_policy_perf, limits->max_perf); + if (per_cpu_limits) { + cpu->min_perf = min_policy_perf; + cpu->max_perf = max_policy_perf; + } else { + int32_t global_min, global_max; - /* Make sure min_perf <= max_perf */ - limits->min_perf = min(limits->min_perf, limits->max_perf); + /* Global limits are in percent of the maximum turbo P-state. */ + global_max = percent_ext_fp(global.max_perf_pct); + global_min = percent_ext_fp(global.min_perf_pct); + if (max_freq != cpu->pstate.turbo_freq) { + int32_t turbo_factor; - limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); - limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); - limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100); - limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100); + turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate, + cpu->pstate.max_pstate); + global_min = mul_ext_fp(global_min, turbo_factor); + global_max = mul_ext_fp(global_max, turbo_factor); + } + global_min = clamp_t(int32_t, global_min, 0, global_max); + + cpu->min_perf = max(min_policy_perf, global_min); + cpu->min_perf = min(cpu->min_perf, max_policy_perf); + cpu->max_perf = min(max_policy_perf, global_max); + cpu->max_perf = max(min_policy_perf, cpu->max_perf); + + /* Make sure min_perf <= max_perf */ + cpu->min_perf = min(cpu->min_perf, cpu->max_perf); + } + + cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS); + cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS); pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, - limits->max_perf_pct, limits->min_perf_pct); + fp_ext_toint(cpu->max_perf * 100), + fp_ext_toint(cpu->min_perf * 100)); } static int intel_pstate_set_policy(struct cpufreq_policy *policy) { struct cpudata *cpu; - struct perf_limits *perf_limits = &global; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -2105,19 +2027,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; cpu->policy = policy->policy; - if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && - policy->max < policy->cpuinfo.max_freq && - policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { - pr_debug("policy->max > max non turbo frequency\n"); - policy->max = policy->cpuinfo.max_freq; - } - - if (per_cpu_limits) - perf_limits = cpu->perf_limits; - mutex_lock(&intel_pstate_limits_lock); - intel_pstate_update_perf_limits(policy, perf_limits); + intel_pstate_update_perf_limits(policy, cpu); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* @@ -2130,38 +2042,38 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_set_update_util_hook(policy->cpu); - intel_pstate_hwp_set_policy(policy); + if (hwp_active) + intel_pstate_hwp_set(policy->cpu); mutex_unlock(&intel_pstate_limits_lock); return 0; } +static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, + struct cpudata *cpu) +{ + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && + policy->max < policy->cpuinfo.max_freq && + policy->max > cpu->pstate.max_freq) { + pr_debug("policy->max > max non turbo frequency\n"); + policy->max = policy->cpuinfo.max_freq; + } +} + static int intel_pstate_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ? - cpu->pstate.max_freq : - cpu->pstate.turbo_freq; - - cpufreq_verify_within_cpu_limits(policy); + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + intel_pstate_get_max_freq(cpu)); if (policy->policy != CPUFREQ_POLICY_POWERSAVE && policy->policy != CPUFREQ_POLICY_PERFORMANCE) return -EINVAL; - /* When per-CPU limits are used, sysfs limits are not used */ - if (!per_cpu_limits) { - unsigned int max_freq, min_freq; - - max_freq = policy->cpuinfo.max_freq * - global.max_sysfs_pct / 100; - min_freq = policy->cpuinfo.max_freq * - global.min_sysfs_pct / 100; - cpufreq_verify_within_limits(policy, min_freq, max_freq); - } + intel_pstate_adjust_policy_max(policy, cpu); return 0; } @@ -2202,8 +2114,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - if (per_cpu_limits) - intel_pstate_init_limits(cpu->perf_limits); + cpu->max_perf = int_ext_tofp(1); + cpu->min_perf = 0; policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; @@ -2257,10 +2169,12 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ? - cpu->pstate.max_freq : cpu->pstate.turbo_freq; + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + intel_pstate_get_max_freq(cpu)); - cpufreq_verify_within_cpu_limits(policy); + intel_pstate_adjust_policy_max(policy, cpu); + + intel_pstate_update_perf_limits(policy, cpu); return 0; } @@ -2324,6 +2238,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) return ret; policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY; + policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; /* This reflects the intel_pstate_get_cpu_pstates() setting. */ policy->cur = policy->cpuinfo.min_freq; @@ -2341,7 +2256,13 @@ static struct cpufreq_driver intel_cpufreq = { .name = "intel_cpufreq", }; -static struct cpufreq_driver *intel_pstate_driver = &intel_pstate; +static struct cpufreq_driver *default_driver = &intel_pstate; + +static bool pid_in_use(void) +{ + return intel_pstate_driver == &intel_pstate && + pstate_funcs.update_util == intel_pstate_update_util_pid; +} static void intel_pstate_driver_cleanup(void) { @@ -2358,26 +2279,26 @@ static void intel_pstate_driver_cleanup(void) } } put_online_cpus(); + intel_pstate_driver = NULL; } -static int intel_pstate_register_driver(void) +static int intel_pstate_register_driver(struct cpufreq_driver *driver) { int ret; - intel_pstate_init_limits(&global); + memset(&global, 0, sizeof(global)); + global.max_perf_pct = 100; + intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { intel_pstate_driver_cleanup(); return ret; } - mutex_lock(&intel_pstate_limits_lock); - driver_registered = true; - mutex_unlock(&intel_pstate_limits_lock); + global.min_perf_pct = min_perf_pct_min(); - if (intel_pstate_driver == &intel_pstate && !hwp_active && - pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) + if (pid_in_use()) intel_pstate_debug_expose_params(); return 0; @@ -2388,14 +2309,9 @@ static int intel_pstate_unregister_driver(void) if (hwp_active) return -EBUSY; - if (intel_pstate_driver == &intel_pstate && !hwp_active && - pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) + if (pid_in_use()) intel_pstate_debug_hide_params(); - mutex_lock(&intel_pstate_limits_lock); - driver_registered = false; - mutex_unlock(&intel_pstate_limits_lock); - cpufreq_unregister_driver(intel_pstate_driver); intel_pstate_driver_cleanup(); @@ -2404,7 +2320,7 @@ static int intel_pstate_unregister_driver(void) static ssize_t intel_pstate_show_status(char *buf) { - if (!driver_registered) + if (!intel_pstate_driver) return sprintf(buf, "off\n"); return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ? @@ -2416,11 +2332,11 @@ static int intel_pstate_update_status(const char *buf, size_t size) int ret; if (size == 3 && !strncmp(buf, "off", size)) - return driver_registered ? + return intel_pstate_driver ? intel_pstate_unregister_driver() : -EINVAL; if (size == 6 && !strncmp(buf, "active", size)) { - if (driver_registered) { + if (intel_pstate_driver) { if (intel_pstate_driver == &intel_pstate) return 0; @@ -2429,13 +2345,12 @@ static int intel_pstate_update_status(const char *buf, size_t size) return ret; } - intel_pstate_driver = &intel_pstate; - return intel_pstate_register_driver(); + return intel_pstate_register_driver(&intel_pstate); } if (size == 7 && !strncmp(buf, "passive", size)) { - if (driver_registered) { - if (intel_pstate_driver != &intel_pstate) + if (intel_pstate_driver) { + if (intel_pstate_driver == &intel_cpufreq) return 0; ret = intel_pstate_unregister_driver(); @@ -2443,8 +2358,7 @@ static int intel_pstate_update_status(const char *buf, size_t size) return ret; } - intel_pstate_driver = &intel_cpufreq; - return intel_pstate_register_driver(); + return intel_pstate_register_driver(&intel_cpufreq); } return -EINVAL; @@ -2465,23 +2379,17 @@ static int __init intel_pstate_msrs_not_valid(void) return 0; } -static void __init copy_pid_params(struct pstate_adjust_policy *policy) -{ - pid_params.sample_rate_ms = policy->sample_rate_ms; - pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; - pid_params.p_gain_pct = policy->p_gain_pct; - pid_params.i_gain_pct = policy->i_gain_pct; - pid_params.d_gain_pct = policy->d_gain_pct; - pid_params.deadband = policy->deadband; - pid_params.setpoint = policy->setpoint; -} - #ifdef CONFIG_ACPI static void intel_pstate_use_acpi_profile(void) { - if (acpi_gbl_FADT.preferred_profile == PM_MOBILE) - pstate_funcs.get_target_pstate = - get_target_pstate_use_cpu_load; + switch (acpi_gbl_FADT.preferred_profile) { + case PM_MOBILE: + case PM_TABLET: + case PM_APPLIANCE_PC: + case PM_DESKTOP: + case PM_WORKSTATION: + pstate_funcs.update_util = intel_pstate_update_util; + } } #else static void intel_pstate_use_acpi_profile(void) @@ -2498,7 +2406,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs) pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.get_val = funcs->get_val; pstate_funcs.get_vid = funcs->get_vid; - pstate_funcs.get_target_pstate = funcs->get_target_pstate; + pstate_funcs.update_util = funcs->update_util; intel_pstate_use_acpi_profile(); } @@ -2637,29 +2545,31 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = { static int __init intel_pstate_init(void) { - const struct x86_cpu_id *id; - struct cpu_defaults *cpu_def; - int rc = 0; + int rc; if (no_load) return -ENODEV; - if (x86_match_cpu(hwp_support_ids) && !no_hwp) { - copy_cpu_funcs(&core_params.funcs); - hwp_active++; - intel_pstate.attr = hwp_cpufreq_attrs; - goto hwp_cpu_matched; + if (x86_match_cpu(hwp_support_ids)) { + copy_cpu_funcs(&core_funcs); + if (no_hwp) { + pstate_funcs.update_util = intel_pstate_update_util; + } else { + hwp_active++; + intel_pstate.attr = hwp_cpufreq_attrs; + pstate_funcs.update_util = intel_pstate_update_util_hwp; + goto hwp_cpu_matched; + } + } else { + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + copy_cpu_funcs((struct pstate_funcs *)id->driver_data); } - id = x86_match_cpu(intel_pstate_cpu_ids); - if (!id) - return -ENODEV; - - cpu_def = (struct cpu_defaults *)id->driver_data; - - copy_pid_params(&cpu_def->pid_policy); - copy_cpu_funcs(&cpu_def->funcs); - if (intel_pstate_msrs_not_valid()) return -ENODEV; @@ -2685,7 +2595,7 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); mutex_lock(&intel_pstate_driver_lock); - rc = intel_pstate_register_driver(); + rc = intel_pstate_register_driver(default_driver); mutex_unlock(&intel_pstate_driver_lock); if (rc) return rc; @@ -2706,7 +2616,7 @@ static int __init intel_pstate_setup(char *str) no_load = 1; } else if (!strcmp(str, "passive")) { pr_info("Passive mode enabled\n"); - intel_pstate_driver = &intel_cpufreq; + default_driver = &intel_cpufreq; no_hwp = 1; } if (!strcmp(str, "no_hwp")) { diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c index ab25b1235a5e..fd1886faf33a 100644 --- a/drivers/cpufreq/mt8173-cpufreq.c +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -573,14 +573,33 @@ static struct platform_driver mt8173_cpufreq_platdrv = { .probe = mt8173_cpufreq_probe, }; -static int mt8173_cpufreq_driver_init(void) +/* List of machines supported by this driver */ +static const struct of_device_id mt8173_cpufreq_machines[] __initconst = { + { .compatible = "mediatek,mt817x", }, + { .compatible = "mediatek,mt8173", }, + { .compatible = "mediatek,mt8176", }, + + { } +}; + +static int __init mt8173_cpufreq_driver_init(void) { + struct device_node *np; + const struct of_device_id *match; struct platform_device *pdev; int err; - if (!of_machine_is_compatible("mediatek,mt8173")) + np = of_find_node_by_path("/"); + if (!np) return -ENODEV; + match = of_match_node(mt8173_cpufreq_machines, np); + of_node_put(np); + if (!match) { + pr_warn("Machine is not compatible with mt8173-cpufreq\n"); + return -ENODEV; + } + err = platform_driver_register(&mt8173_cpufreq_platdrv); if (err) return err; diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index bfec1bcd3835..e2ea433a5f9c 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -52,17 +52,27 @@ static u32 get_bus_freq(void) { struct device_node *soc; u32 sysfreq; + struct clk *pltclk; + int ret; + /* get platform freq by searching bus-frequency property */ soc = of_find_node_by_type(NULL, "soc"); - if (!soc) - return 0; + if (soc) { + ret = of_property_read_u32(soc, "bus-frequency", &sysfreq); + of_node_put(soc); + if (!ret) + return sysfreq; + } - if (of_property_read_u32(soc, "bus-frequency", &sysfreq)) - sysfreq = 0; + /* get platform freq by its clock name */ + pltclk = clk_get(NULL, "cg-pll0-div1"); + if (IS_ERR(pltclk)) { + pr_err("%s: can't get bus frequency %ld\n", + __func__, PTR_ERR(pltclk)); + return PTR_ERR(pltclk); + } - of_node_put(soc); - - return sysfreq; + return clk_get_rate(pltclk); } static struct clk *cpu_to_clk(int cpu) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c new file mode 100644 index 000000000000..fe7875311d62 --- /dev/null +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define EDVD_CORE_VOLT_FREQ(core) (0x20 + (core) * 0x4) +#define EDVD_CORE_VOLT_FREQ_F_SHIFT 0 +#define EDVD_CORE_VOLT_FREQ_V_SHIFT 16 + +struct tegra186_cpufreq_cluster_info { + unsigned long offset; + int cpus[4]; + unsigned int bpmp_cluster_id; +}; + +#define NO_CPU -1 +static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = { + /* Denver cluster */ + { + .offset = SZ_64K * 7, + .cpus = { 1, 2, NO_CPU, NO_CPU }, + .bpmp_cluster_id = 0, + }, + /* A57 cluster */ + { + .offset = SZ_64K * 6, + .cpus = { 0, 3, 4, 5 }, + .bpmp_cluster_id = 1, + }, +}; + +struct tegra186_cpufreq_cluster { + const struct tegra186_cpufreq_cluster_info *info; + struct cpufreq_frequency_table *table; +}; + +struct tegra186_cpufreq_data { + void __iomem *regs; + + size_t num_clusters; + struct tegra186_cpufreq_cluster *clusters; +}; + +static int tegra186_cpufreq_init(struct cpufreq_policy *policy) +{ + struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); + unsigned int i; + + for (i = 0; i < data->num_clusters; i++) { + struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; + const struct tegra186_cpufreq_cluster_info *info = + cluster->info; + int core; + + for (core = 0; core < ARRAY_SIZE(info->cpus); core++) { + if (info->cpus[core] == policy->cpu) + break; + } + if (core == ARRAY_SIZE(info->cpus)) + continue; + + policy->driver_data = + data->regs + info->offset + EDVD_CORE_VOLT_FREQ(core); + cpufreq_table_validate_and_show(policy, cluster->table); + } + + policy->cpuinfo.transition_latency = 300 * 1000; + + return 0; +} + +static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int index) +{ + struct cpufreq_frequency_table *tbl = policy->freq_table + index; + void __iomem *edvd_reg = policy->driver_data; + u32 edvd_val = tbl->driver_data; + + writel(edvd_val, edvd_reg); + + return 0; +} + +static struct cpufreq_driver tegra186_cpufreq_driver = { + .name = "tegra186", + .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = tegra186_cpufreq_set_target, + .init = tegra186_cpufreq_init, + .attr = cpufreq_generic_attr, +}; + +static struct cpufreq_frequency_table *init_vhint_table( + struct platform_device *pdev, struct tegra_bpmp *bpmp, + unsigned int cluster_id) +{ + struct cpufreq_frequency_table *table; + struct mrq_cpu_vhint_request req; + struct tegra_bpmp_message msg; + struct cpu_vhint_data *data; + int err, i, j, num_rates = 0; + dma_addr_t phys; + void *virt; + + virt = dma_alloc_coherent(bpmp->dev, sizeof(*data), &phys, + GFP_KERNEL | GFP_DMA32); + if (!virt) + return ERR_PTR(-ENOMEM); + + data = (struct cpu_vhint_data *)virt; + + memset(&req, 0, sizeof(req)); + req.addr = phys; + req.cluster_id = cluster_id; + + memset(&msg, 0, sizeof(msg)); + msg.mrq = MRQ_CPU_VHINT; + msg.tx.data = &req; + msg.tx.size = sizeof(req); + + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) { + table = ERR_PTR(err); + goto free; + } + + for (i = data->vfloor; i <= data->vceil; i++) { + u16 ndiv = data->ndiv[i]; + + if (ndiv < data->ndiv_min || ndiv > data->ndiv_max) + continue; + + /* Only store lowest voltage index for each rate */ + if (i > 0 && ndiv == data->ndiv[i - 1]) + continue; + + num_rates++; + } + + table = devm_kcalloc(&pdev->dev, num_rates + 1, sizeof(*table), + GFP_KERNEL); + if (!table) { + table = ERR_PTR(-ENOMEM); + goto free; + } + + for (i = data->vfloor, j = 0; i <= data->vceil; i++) { + struct cpufreq_frequency_table *point; + u16 ndiv = data->ndiv[i]; + u32 edvd_val = 0; + + if (ndiv < data->ndiv_min || ndiv > data->ndiv_max) + continue; + + /* Only store lowest voltage index for each rate */ + if (i > 0 && ndiv == data->ndiv[i - 1]) + continue; + + edvd_val |= i << EDVD_CORE_VOLT_FREQ_V_SHIFT; + edvd_val |= ndiv << EDVD_CORE_VOLT_FREQ_F_SHIFT; + + point = &table[j++]; + point->driver_data = edvd_val; + point->frequency = data->ref_clk_hz * ndiv / data->pdiv / + data->mdiv / 1000; + } + + table[j].frequency = CPUFREQ_TABLE_END; + +free: + dma_free_coherent(bpmp->dev, sizeof(*data), virt, phys); + + return table; +} + +static int tegra186_cpufreq_probe(struct platform_device *pdev) +{ + struct tegra186_cpufreq_data *data; + struct tegra_bpmp *bpmp; + struct resource *res; + unsigned int i = 0, err; + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->clusters = devm_kcalloc(&pdev->dev, ARRAY_SIZE(tegra186_clusters), + sizeof(*data->clusters), GFP_KERNEL); + if (!data->clusters) + return -ENOMEM; + + data->num_clusters = ARRAY_SIZE(tegra186_clusters); + + bpmp = tegra_bpmp_get(&pdev->dev); + if (IS_ERR(bpmp)) + return PTR_ERR(bpmp); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(data->regs)) { + err = PTR_ERR(data->regs); + goto put_bpmp; + } + + for (i = 0; i < data->num_clusters; i++) { + struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; + + cluster->info = &tegra186_clusters[i]; + cluster->table = init_vhint_table( + pdev, bpmp, cluster->info->bpmp_cluster_id); + if (IS_ERR(cluster->table)) { + err = PTR_ERR(cluster->table); + goto put_bpmp; + } + } + + tegra_bpmp_put(bpmp); + + tegra186_cpufreq_driver.driver_data = data; + + err = cpufreq_register_driver(&tegra186_cpufreq_driver); + if (err) + return err; + + return 0; + +put_bpmp: + tegra_bpmp_put(bpmp); + + return err; +} + +static int tegra186_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&tegra186_cpufreq_driver); + + return 0; +} + +static const struct of_device_id tegra186_cpufreq_of_match[] = { + { .compatible = "nvidia,tegra186-ccplex-cluster", }, + { } +}; +MODULE_DEVICE_TABLE(of, tegra186_cpufreq_of_match); + +static struct platform_driver tegra186_cpufreq_platform_driver = { + .driver = { + .name = "tegra186-cpufreq", + .of_match_table = tegra186_cpufreq_of_match, + }, + .probe = tegra186_cpufreq_probe, + .remove = tegra186_cpufreq_remove, +}; +module_platform_driver(tegra186_cpufreq_platform_driver); + +MODULE_AUTHOR("Mikko Perttunen "); +MODULE_DESCRIPTION("NVIDIA Tegra186 cpufreq driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 776b34396144..0a16cf4bed39 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -291,18 +291,6 @@ config ARMADA_THERMAL Enable this option if you want to have support for thermal management controller present in Armada 370 and Armada XP SoC. -config DB8500_CPUFREQ_COOLING - tristate "DB8500 cpufreq cooling" - depends on ARCH_U8500 || COMPILE_TEST - depends on HAS_IOMEM - depends on CPU_THERMAL - default y - help - Adds DB8500 cpufreq cooling devices, and these cooling devices can be - bound to thermal zone trip points. When a trip point reached, the - bound cpufreq cooling device turns active to set CPU frequency low to - cool down the CPU. - config INTEL_POWERCLAMP tristate "Intel PowerClamp idle injection driver" depends on THERMAL diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 7adae2029355..c2372f10dae5 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_TANGO_THERMAL) += tango_thermal.o obj-$(CONFIG_IMX_THERMAL) += imx_thermal.o obj-$(CONFIG_MAX77620_THERMAL) += max77620_thermal.o obj-$(CONFIG_QORIQ_THERMAL) += qoriq_thermal.o -obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE) += intel_soc_dts_iosf.o diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c deleted file mode 100644 index e58bd0b658b5..000000000000 --- a/drivers/thermal/db8500_cpufreq_cooling.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * db8500_cpufreq_cooling.c - DB8500 cpufreq works as cooling device. - * - * Copyright (C) 2012 ST-Ericsson - * Copyright (C) 2012 Linaro Ltd. - * - * Author: Hongbo Zhang - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#include - -static int db8500_cpufreq_cooling_probe(struct platform_device *pdev) -{ - struct thermal_cooling_device *cdev; - - cdev = cpufreq_cooling_register(cpu_present_mask); - if (IS_ERR(cdev)) { - int ret = PTR_ERR(cdev); - - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "Failed to register cooling device %d\n", - ret); - - return ret; - } - - platform_set_drvdata(pdev, cdev); - - dev_info(&pdev->dev, "Cooling device registered: %s\n", cdev->type); - - return 0; -} - -static int db8500_cpufreq_cooling_remove(struct platform_device *pdev) -{ - struct thermal_cooling_device *cdev = platform_get_drvdata(pdev); - - cpufreq_cooling_unregister(cdev); - - return 0; -} - -static int db8500_cpufreq_cooling_suspend(struct platform_device *pdev, - pm_message_t state) -{ - return -ENOSYS; -} - -static int db8500_cpufreq_cooling_resume(struct platform_device *pdev) -{ - return -ENOSYS; -} - -#ifdef CONFIG_OF -static const struct of_device_id db8500_cpufreq_cooling_match[] = { - { .compatible = "stericsson,db8500-cpufreq-cooling" }, - {}, -}; -MODULE_DEVICE_TABLE(of, db8500_cpufreq_cooling_match); -#endif - -static struct platform_driver db8500_cpufreq_cooling_driver = { - .driver = { - .name = "db8500-cpufreq-cooling", - .of_match_table = of_match_ptr(db8500_cpufreq_cooling_match), - }, - .probe = db8500_cpufreq_cooling_probe, - .suspend = db8500_cpufreq_cooling_suspend, - .resume = db8500_cpufreq_cooling_resume, - .remove = db8500_cpufreq_cooling_remove, -}; - -static int __init db8500_cpufreq_cooling_init(void) -{ - return platform_driver_register(&db8500_cpufreq_cooling_driver); -} - -static void __exit db8500_cpufreq_cooling_exit(void) -{ - platform_driver_unregister(&db8500_cpufreq_cooling_driver); -} - -/* Should be later than db8500_cpufreq_register */ -late_initcall(db8500_cpufreq_cooling_init); -module_exit(db8500_cpufreq_cooling_exit); - -MODULE_AUTHOR("Hongbo Zhang "); -MODULE_DESCRIPTION("DB8500 cpufreq cooling driver"); -MODULE_LICENSE("GPL"); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 87165f06a307..a5ce0bbeadb5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -120,6 +120,13 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Preferred average time interval between consecutive invocations of + * the driver to set the frequency for this policy. To be set by the + * scaling driver (0, which is the default, means no preference). + */ + unsigned int transition_delay_us; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; diff --git a/include/linux/tick.h b/include/linux/tick.h index a04fea19676f..fe01e68bf520 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -117,6 +117,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern unsigned long tick_nohz_get_idle_calls(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 54c577578da6..76877a62b5fa 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -61,6 +61,11 @@ struct sugov_cpu { unsigned long util; unsigned long max; unsigned int flags; + + /* The field below is for single-CPU policies only. */ +#ifdef CONFIG_NO_HZ_COMMON + unsigned long saved_idle_calls; +#endif }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -93,22 +98,23 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, { struct cpufreq_policy *policy = sg_policy->policy; + if (sg_policy->next_freq == next_freq) + return; + + if (sg_policy->next_freq > next_freq) + next_freq = (sg_policy->next_freq + next_freq) >> 1; + + sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; if (policy->fast_switch_enabled) { - if (sg_policy->next_freq == next_freq) { - trace_cpu_frequency(policy->cur, smp_processor_id()); - return; - } - sg_policy->next_freq = next_freq; next_freq = cpufreq_driver_fast_switch(policy, next_freq); if (next_freq == CPUFREQ_ENTRY_INVALID) return; policy->cur = next_freq; trace_cpu_frequency(next_freq, smp_processor_id()); - } else if (sg_policy->next_freq != next_freq) { - sg_policy->next_freq = next_freq; + } else { sg_policy->work_in_progress = true; irq_work_queue(&sg_policy->irq_work); } @@ -192,6 +198,19 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, sg_cpu->iowait_boost >>= 1; } +#ifdef CONFIG_NO_HZ_COMMON +static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) +{ + unsigned long idle_calls = tick_nohz_get_idle_calls(); + bool ret = idle_calls == sg_cpu->saved_idle_calls; + + sg_cpu->saved_idle_calls = idle_calls; + return ret; +} +#else +static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } +#endif /* CONFIG_NO_HZ_COMMON */ + static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned int flags) { @@ -200,6 +219,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, struct cpufreq_policy *policy = sg_policy->policy; unsigned long util, max; unsigned int next_f; + bool busy; sugov_set_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; @@ -207,40 +227,37 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; + busy = sugov_cpu_is_busy(sg_cpu); + if (flags & SCHED_CPUFREQ_RT_DL) { next_f = policy->cpuinfo.max_freq; } else { sugov_get_util(&util, &max); sugov_iowait_boost(sg_cpu, &util, &max); next_f = get_next_freq(sg_policy, util, max); + /* + * Do not reduce the frequency if the CPU has not been idle + * recently, as the reduction is likely to be premature then. + */ + if (busy && next_f < sg_policy->next_freq) + next_f = sg_policy->next_freq; } sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max, - unsigned int flags) +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned int max_f = policy->cpuinfo.max_freq; u64 last_freq_update_time = sg_policy->last_freq_update_time; + unsigned long util = 0, max = 1; unsigned int j; - if (flags & SCHED_CPUFREQ_RT_DL) - return max_f; - - sugov_iowait_boost(sg_cpu, &util, &max); - for_each_cpu(j, policy->cpus) { - struct sugov_cpu *j_sg_cpu; + struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); unsigned long j_util, j_max; s64 delta_ns; - if (j == smp_processor_id()) - continue; - - j_sg_cpu = &per_cpu(sugov_cpu, j); /* * If the CPU utilization was last updated before the previous * frequency update and the time elapsed between the last update @@ -254,7 +271,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) - return max_f; + return policy->cpuinfo.max_freq; j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; @@ -289,7 +306,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); + if (flags & SCHED_CPUFREQ_RT_DL) + next_f = sg_policy->policy->cpuinfo.max_freq; + else + next_f = sugov_next_freq_shared(sg_cpu); + sugov_update_commit(sg_policy, time, next_f); } @@ -473,7 +494,6 @@ static int sugov_init(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy; struct sugov_tunables *tunables; - unsigned int lat; int ret = 0; /* State should be equivalent to EXIT */ @@ -512,10 +532,16 @@ static int sugov_init(struct cpufreq_policy *policy) goto stop_kthread; } - tunables->rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) - tunables->rate_limit_us *= lat; + if (policy->transition_delay_us) { + tunables->rate_limit_us = policy->transition_delay_us; + } else { + unsigned int lat; + + tunables->rate_limit_us = LATENCY_MULTIPLIER; + lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (lat) + tunables->rate_limit_us *= lat; + } policy->governor_data = sg_policy; sg_policy->tunables = tunables; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7fe53be86077..64c97fc130c4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -993,6 +993,18 @@ ktime_t tick_nohz_get_sleep_length(void) return ts->sleep_length; } +/** + * tick_nohz_get_idle_calls - return the current idle calls counter value + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls(void) +{ + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); + + return ts->idle_calls; +} + static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE