diff options
Diffstat (limited to 'intel-pstate-backport.patch')
-rw-r--r-- | intel-pstate-backport.patch | 1231 |
1 files changed, 1100 insertions, 131 deletions
diff --git a/intel-pstate-backport.patch b/intel-pstate-backport.patch index 9e466e174119..cf1427867261 100644 --- a/intel-pstate-backport.patch +++ b/intel-pstate-backport.patch @@ -1,148 +1,1117 @@ ---- linux-4.8/drivers/cpufreq/intel_pstate.c.orig 2016-10-02 19:24:33.000000000 -0400 -+++ linux-4.8/drivers/cpufreq/intel_pstate.c 2016-10-09 19:32:01.073141319 -0400 -@@ -181,6 +181,8 @@ - * @cpu: CPU number for this instance data - * @update_util: CPUFreq utility callback information - * @update_util_set: CPUFreq utility callback is set -+ * @iowait_boost: iowait-related boost fraction -+ * @last_update: Time of the last update. - * @pstate: Stores P state limits for this CPU - * @vid: Stores VID limits for this CPU - * @pid: Stores PID parameters for this CPU -@@ -206,6 +208,7 @@ - struct vid_data vid; - struct _pid pid; - -+ u64 last_update; - u64 last_sample_time; - u64 prev_aperf; - u64 prev_mperf; -@@ -216,6 +219,7 @@ - struct acpi_processor_performance acpi_perf_data; - bool valid_pss_table; - #endif -+ unsigned int iowait_boost; - }; +--- drivers/cpufreq/intel_pstate.c.orig 2017-02-19 17:34:00.000000000 -0500 ++++ drivers/cpufreq/intel_pstate.c 2017-03-23 23:47:19.095511820 -0400 +@@ -19,7 +19,7 @@ + #include <linux/hrtimer.h> + #include <linux/tick.h> + #include <linux/slab.h> +-#include <linux/sched.h> ++#include <linux/sched/cpufreq.h> + #include <linux/list.h> + #include <linux/cpu.h> + #include <linux/cpufreq.h> +@@ -39,11 +39,6 @@ - static struct cpudata **all_cpu_data; -@@ -229,6 +233,7 @@ - * @p_gain_pct: PID proportional gain - * @i_gain_pct: PID integral gain - * @d_gain_pct: PID derivative gain -+ * @boost_iowait: Whether or not to use iowait boosting. - * - * Stores per CPU model static PID configuration data. - */ -@@ -240,6 +245,7 @@ - int p_gain_pct; - int d_gain_pct; - int i_gain_pct; -+ bool boost_iowait; - }; + #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 + +-#define ATOM_RATIOS 0x66a +-#define ATOM_VIDS 0x66b +-#define ATOM_TURBO_RATIOS 0x66c +-#define ATOM_TURBO_VIDS 0x66d +- + #ifdef CONFIG_ACPI + #include <acpi/processor.h> + #include <acpi/cppc_acpi.h> +@@ -89,6 +84,11 @@ + return div64_u64(x << EXT_FRAC_BITS, y); + } ++static inline int32_t percent_ext_fp(int percent) ++{ ++ return div_ext_fp(percent, 100); ++} ++ /** -@@ -1029,7 +1035,7 @@ - }, - }; + * struct sample - Store performance sample + * @core_avg_perf: Ratio of APERF/MPERF which is the actual average +@@ -358,42 +358,24 @@ + static int hwp_active __read_mostly; + static bool per_cpu_limits __read_mostly; --static struct cpu_defaults silvermont_params = { -+static const struct cpu_defaults silvermont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, -@@ -1037,6 +1043,7 @@ - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, -+ .boost_iowait = true, - }, - .funcs = { - .get_max = atom_get_max_pstate, -@@ -1050,7 +1057,7 @@ - }, - }; ++static bool driver_registered __read_mostly; ++ + #ifdef CONFIG_ACPI + static bool acpi_ppc; + #endif + +-static struct perf_limits performance_limits = { +- .no_turbo = 0, +- .turbo_disabled = 0, +- .max_perf_pct = 100, +- .max_perf = int_ext_tofp(1), +- .min_perf_pct = 100, +- .min_perf = int_ext_tofp(1), +- .max_policy_pct = 100, +- .max_sysfs_pct = 100, +- .min_policy_pct = 0, +- .min_sysfs_pct = 0, +-}; +- +-static struct perf_limits powersave_limits = { +- .no_turbo = 0, +- .turbo_disabled = 0, +- .max_perf_pct = 100, +- .max_perf = int_ext_tofp(1), +- .min_perf_pct = 0, +- .min_perf = 0, +- .max_policy_pct = 100, +- .max_sysfs_pct = 100, +- .min_policy_pct = 0, +- .min_sysfs_pct = 0, +-}; ++static struct perf_limits global; + +-#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +-static struct perf_limits *limits = &performance_limits; +-#else +-static struct perf_limits *limits = &powersave_limits; +-#endif ++static void intel_pstate_init_limits(struct perf_limits *limits) ++{ ++ memset(limits, 0, sizeof(*limits)); ++ limits->max_perf_pct = 100; ++ limits->max_perf = int_ext_tofp(1); ++ limits->max_policy_pct = 100; ++ limits->max_sysfs_pct = 100; ++} + ++static DEFINE_MUTEX(intel_pstate_driver_lock); + static DEFINE_MUTEX(intel_pstate_limits_lock); + + #ifdef CONFIG_ACPI +@@ -515,7 +497,7 @@ + * correct max turbo frequency based on the turbo state. + * Also need to convert to MHz as _PSS freq is in MHz. + */ +- if (!limits->turbo_disabled) ++ if (!global.turbo_disabled) + cpu->acpi_perf_data.states[0].core_frequency = + policy->cpuinfo.max_freq / 1000; + cpu->valid_pss_table = true; +@@ -538,7 +520,6 @@ + + acpi_processor_unregister_performance(policy->cpu); + } +- + #else + static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) + { +@@ -635,7 +616,7 @@ + + cpu = all_cpu_data[0]; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); +- limits->turbo_disabled = ++ global.turbo_disabled = + (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || + cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); + } +@@ -859,12 +840,11 @@ + + static void intel_pstate_hwp_set(struct cpufreq_policy *policy) + { +- int min, hw_min, max, hw_max, cpu, range, adj_range; +- struct perf_limits *perf_limits = limits; ++ int min, hw_min, max, hw_max, cpu; ++ struct perf_limits *perf_limits = &global; + u64 value, cap; + + for_each_cpu(cpu, policy->cpus) { +- int max_perf_pct, min_perf_pct; + struct cpudata *cpu_data = all_cpu_data[cpu]; + s16 epp; --static struct cpu_defaults airmont_params = { -+static const struct cpu_defaults airmont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, -@@ -1058,6 +1065,7 @@ - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, -+ .boost_iowait = true, - }, - .funcs = { - .get_max = atom_get_max_pstate, -@@ -1071,7 +1079,7 @@ - }, +@@ -873,26 +853,22 @@ + + rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + hw_min = HWP_LOWEST_PERF(cap); +- hw_max = HWP_HIGHEST_PERF(cap); +- range = hw_max - hw_min; ++ if (global.no_turbo) ++ hw_max = HWP_GUARANTEED_PERF(cap); ++ else ++ hw_max = HWP_HIGHEST_PERF(cap); + +- max_perf_pct = perf_limits->max_perf_pct; +- min_perf_pct = perf_limits->min_perf_pct; ++ max = fp_ext_toint(hw_max * perf_limits->max_perf); ++ if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) ++ min = max; ++ else ++ min = fp_ext_toint(hw_max * perf_limits->min_perf); + + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); +- adj_range = min_perf_pct * range / 100; +- min = hw_min + adj_range; ++ + value &= ~HWP_MIN_PERF(~0L); + value |= HWP_MIN_PERF(min); + +- adj_range = max_perf_pct * range / 100; +- max = hw_min + adj_range; +- if (limits->no_turbo) { +- hw_max = HWP_GUARANTEED_PERF(cap); +- if (hw_max < max) +- max = hw_max; +- } +- + value &= ~HWP_MAX_PERF(~0L); + value |= HWP_MAX_PERF(max); + +@@ -996,6 +972,7 @@ + static int pid_param_set(void *data, u64 val) + { + *(u32 *)data = val; ++ pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; + intel_pstate_reset_all_pid(); + return 0; + } +@@ -1007,35 +984,57 @@ + } + DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); + ++static struct dentry *debugfs_parent; ++ + struct pid_param { + char *name; + void *value; ++ struct dentry *dentry; }; --static struct cpu_defaults knl_params = { -+static const struct cpu_defaults knl_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, -@@ -1091,7 +1099,7 @@ - }, + static struct pid_param pid_files[] = { +- {"sample_rate_ms", &pid_params.sample_rate_ms}, +- {"d_gain_pct", &pid_params.d_gain_pct}, +- {"i_gain_pct", &pid_params.i_gain_pct}, +- {"deadband", &pid_params.deadband}, +- {"setpoint", &pid_params.setpoint}, +- {"p_gain_pct", &pid_params.p_gain_pct}, +- {NULL, NULL} ++ {"sample_rate_ms", &pid_params.sample_rate_ms, }, ++ {"d_gain_pct", &pid_params.d_gain_pct, }, ++ {"i_gain_pct", &pid_params.i_gain_pct, }, ++ {"deadband", &pid_params.deadband, }, ++ {"setpoint", &pid_params.setpoint, }, ++ {"p_gain_pct", &pid_params.p_gain_pct, }, ++ {NULL, NULL, } }; --static struct cpu_defaults bxt_params = { -+static const struct cpu_defaults bxt_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, -@@ -1099,6 +1107,7 @@ - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, -+ .boost_iowait = true, - }, - .funcs = { - .get_max = core_get_max_pstate, -@@ -1222,36 +1231,18 @@ - static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) - { - struct sample *sample = &cpu->sample; -- u64 cummulative_iowait, delta_iowait_us; -- u64 delta_iowait_mperf; -- u64 mperf, now; -- int32_t cpu_load; -+ int32_t busy_frac, boost; - -- cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); -+ busy_frac = div_fp(sample->mperf, sample->tsc); +-static void __init intel_pstate_debug_expose_params(void) ++static void intel_pstate_debug_expose_params(void) + { +- struct dentry *debugfs_parent; +- int i = 0; ++ int i; + + debugfs_parent = debugfs_create_dir("pstate_snb", NULL); + if (IS_ERR_OR_NULL(debugfs_parent)) + return; +- while (pid_files[i].name) { +- debugfs_create_file(pid_files[i].name, 0660, +- debugfs_parent, pid_files[i].value, +- &fops_pid_param); +- i++; ++ ++ for (i = 0; pid_files[i].name; i++) { ++ struct dentry *dentry; ++ ++ dentry = debugfs_create_file(pid_files[i].name, 0660, ++ debugfs_parent, pid_files[i].value, ++ &fops_pid_param); ++ if (!IS_ERR(dentry)) ++ pid_files[i].dentry = dentry; ++ } ++} ++ ++static void intel_pstate_debug_hide_params(void) ++{ ++ int i; ++ ++ if (IS_ERR_OR_NULL(debugfs_parent)) ++ return; ++ ++ for (i = 0; pid_files[i].name; i++) { ++ debugfs_remove(pid_files[i].dentry); ++ pid_files[i].dentry = NULL; + } ++ ++ debugfs_remove(debugfs_parent); ++ debugfs_parent = NULL; + } -- /* -- * Convert iowait time into number of IO cycles spent at max_freq. -- * IO is considered as busy only for the cpu_load algorithm. For -- * performance this is not needed since we always try to reach the -- * maximum P-State, so we are already boosting the IOs. -- */ -- delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait; -- delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling * -- cpu->pstate.max_pstate, MSEC_PER_SEC); + /************************** debugfs end ************************/ +@@ -1045,9 +1044,37 @@ + static ssize_t show_##file_name \ + (struct kobject *kobj, struct attribute *attr, char *buf) \ + { \ +- return sprintf(buf, "%u\n", limits->object); \ ++ return sprintf(buf, "%u\n", global.object); \ + } + ++static ssize_t intel_pstate_show_status(char *buf); ++static int intel_pstate_update_status(const char *buf, size_t size); ++ ++static ssize_t show_status(struct kobject *kobj, ++ struct attribute *attr, char *buf) ++{ ++ ssize_t ret; ++ ++ mutex_lock(&intel_pstate_driver_lock); ++ ret = intel_pstate_show_status(buf); ++ mutex_unlock(&intel_pstate_driver_lock); ++ ++ return ret; ++} ++ ++static ssize_t store_status(struct kobject *a, struct attribute *b, ++ const char *buf, size_t count) ++{ ++ char *p = memchr(buf, '\n', count); ++ int ret; ++ ++ mutex_lock(&intel_pstate_driver_lock); ++ ret = intel_pstate_update_status(buf, p ? p - buf : count); ++ mutex_unlock(&intel_pstate_driver_lock); ++ ++ return ret < 0 ? ret : count; ++} ++ + static ssize_t show_turbo_pct(struct kobject *kobj, + struct attribute *attr, char *buf) + { +@@ -1055,12 +1082,22 @@ + int total, no_turbo, turbo_pct; + uint32_t turbo_fp; + ++ mutex_lock(&intel_pstate_driver_lock); ++ ++ if (!driver_registered) { ++ mutex_unlock(&intel_pstate_driver_lock); ++ return -EAGAIN; ++ } ++ + cpu = all_cpu_data[0]; + + total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; + no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; + turbo_fp = div_fp(no_turbo, total); + turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); ++ ++ mutex_unlock(&intel_pstate_driver_lock); ++ + return sprintf(buf, "%u\n", turbo_pct); + } + +@@ -1070,8 +1107,18 @@ + struct cpudata *cpu; + int total; + ++ mutex_lock(&intel_pstate_driver_lock); ++ ++ if (!driver_registered) { ++ mutex_unlock(&intel_pstate_driver_lock); ++ return -EAGAIN; ++ } ++ + cpu = all_cpu_data[0]; + total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; ++ ++ mutex_unlock(&intel_pstate_driver_lock); ++ + return sprintf(buf, "%u\n", total); + } + +@@ -1080,11 +1127,20 @@ + { + ssize_t ret; + ++ mutex_lock(&intel_pstate_driver_lock); ++ ++ if (!driver_registered) { ++ mutex_unlock(&intel_pstate_driver_lock); ++ return -EAGAIN; ++ } ++ + update_turbo_state(); +- if (limits->turbo_disabled) +- ret = sprintf(buf, "%u\n", limits->turbo_disabled); ++ if (global.turbo_disabled) ++ ret = sprintf(buf, "%u\n", global.turbo_disabled); + else +- ret = sprintf(buf, "%u\n", limits->no_turbo); ++ ret = sprintf(buf, "%u\n", global.no_turbo); ++ ++ mutex_unlock(&intel_pstate_driver_lock); + + return ret; + } +@@ -1099,21 +1155,31 @@ + if (ret != 1) + return -EINVAL; + ++ mutex_lock(&intel_pstate_driver_lock); ++ ++ if (!driver_registered) { ++ mutex_unlock(&intel_pstate_driver_lock); ++ return -EAGAIN; ++ } ++ + mutex_lock(&intel_pstate_limits_lock); + + update_turbo_state(); +- if (limits->turbo_disabled) { ++ if (global.turbo_disabled) { + pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); + mutex_unlock(&intel_pstate_limits_lock); ++ mutex_unlock(&intel_pstate_driver_lock); + return -EPERM; + } + +- limits->no_turbo = clamp_t(int, input, 0, 1); ++ global.no_turbo = clamp_t(int, input, 0, 1); + + mutex_unlock(&intel_pstate_limits_lock); + + intel_pstate_update_policies(); + ++ mutex_unlock(&intel_pstate_driver_lock); ++ + return count; + } + +@@ -1127,21 +1193,27 @@ + if (ret != 1) + return -EINVAL; + ++ mutex_lock(&intel_pstate_driver_lock); ++ ++ if (!driver_registered) { ++ mutex_unlock(&intel_pstate_driver_lock); ++ return -EAGAIN; ++ } ++ + mutex_lock(&intel_pstate_limits_lock); + +- limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); +- limits->max_perf_pct = min(limits->max_policy_pct, +- limits->max_sysfs_pct); +- limits->max_perf_pct = max(limits->min_policy_pct, +- limits->max_perf_pct); +- limits->max_perf_pct = max(limits->min_perf_pct, +- limits->max_perf_pct); +- limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); ++ global.max_sysfs_pct = clamp_t(int, input, 0 , 100); ++ global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct); ++ global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct); ++ global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct); ++ global.max_perf = percent_ext_fp(global.max_perf_pct); + + mutex_unlock(&intel_pstate_limits_lock); + + intel_pstate_update_policies(); + ++ mutex_unlock(&intel_pstate_driver_lock); ++ + return count; + } + +@@ -1155,27 +1227,34 @@ + if (ret != 1) + return -EINVAL; + ++ mutex_lock(&intel_pstate_driver_lock); ++ ++ if (!driver_registered) { ++ mutex_unlock(&intel_pstate_driver_lock); ++ return -EAGAIN; ++ } ++ + mutex_lock(&intel_pstate_limits_lock); + +- limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); +- limits->min_perf_pct = max(limits->min_policy_pct, +- limits->min_sysfs_pct); +- limits->min_perf_pct = min(limits->max_policy_pct, +- limits->min_perf_pct); +- limits->min_perf_pct = min(limits->max_perf_pct, +- limits->min_perf_pct); +- limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); ++ global.min_sysfs_pct = clamp_t(int, input, 0 , 100); ++ global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct); ++ global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct); ++ global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct); ++ global.min_perf = percent_ext_fp(global.min_perf_pct); + + mutex_unlock(&intel_pstate_limits_lock); + + intel_pstate_update_policies(); + ++ mutex_unlock(&intel_pstate_driver_lock); ++ + return count; + } + + show_one(max_perf_pct, max_perf_pct); + show_one(min_perf_pct, min_perf_pct); + ++define_one_global_rw(status); + define_one_global_rw(no_turbo); + define_one_global_rw(max_perf_pct); + define_one_global_rw(min_perf_pct); +@@ -1183,6 +1262,7 @@ + define_one_global_ro(num_pstates); + + static struct attribute *intel_pstate_attributes[] = { ++ &status.attr, + &no_turbo.attr, + &turbo_pct.attr, + &num_pstates.attr, +@@ -1258,7 +1338,7 @@ + { + u64 value; + +- rdmsrl(ATOM_RATIOS, value); ++ rdmsrl(MSR_ATOM_CORE_RATIOS, value); + return (value >> 8) & 0x7F; + } + +@@ -1266,7 +1346,7 @@ + { + u64 value; + +- rdmsrl(ATOM_RATIOS, value); ++ rdmsrl(MSR_ATOM_CORE_RATIOS, value); + return (value >> 16) & 0x7F; + } + +@@ -1274,7 +1354,7 @@ + { + u64 value; + +- rdmsrl(ATOM_TURBO_RATIOS, value); ++ rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value); + return value & 0x7F; + } + +@@ -1285,7 +1365,7 @@ + u32 vid; + + val = (u64)pstate << 8; +- if (limits->no_turbo && !limits->turbo_disabled) ++ if (global.no_turbo && !global.turbo_disabled) + val |= (u64)1 << 32; + + vid_fp = cpudata->vid.min + mul_fp( +@@ -1336,7 +1416,7 @@ + { + u64 value; + +- rdmsrl(ATOM_VIDS, value); ++ rdmsrl(MSR_ATOM_CORE_VIDS, value); + cpudata->vid.min = int_tofp((value >> 8) & 0x7f); + cpudata->vid.max = int_tofp((value >> 16) & 0x7f); + cpudata->vid.ratio = div_fp( +@@ -1344,7 +1424,7 @@ + int_tofp(cpudata->pstate.max_pstate - + cpudata->pstate.min_pstate)); + +- rdmsrl(ATOM_TURBO_VIDS, value); ++ rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value); + cpudata->vid.turbo = value & 0x7f; + } + +@@ -1364,48 +1444,71 @@ + return (value >> 8) & 0xFF; + } + ++static int core_get_tdp_ratio(u64 plat_info) ++{ ++ /* Check how many TDP levels present */ ++ if (plat_info & 0x600000000) { ++ u64 tdp_ctrl; ++ u64 tdp_ratio; ++ int tdp_msr; ++ int err; ++ ++ /* Get the TDP level (0, 1, 2) to get ratios */ ++ err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); ++ if (err) ++ return err; ++ ++ /* TDP MSR are continuous starting at 0x648 */ ++ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03); ++ err = rdmsrl_safe(tdp_msr, &tdp_ratio); ++ if (err) ++ return err; ++ ++ /* For level 1 and 2, bits[23:16] contain the ratio */ ++ if (tdp_ctrl & 0x03) ++ tdp_ratio >>= 16; ++ ++ tdp_ratio &= 0xff; /* ratios are only 8 bits long */ ++ pr_debug("tdp_ratio %x\n", (int)tdp_ratio); ++ ++ return (int)tdp_ratio; ++ } ++ ++ return -ENXIO; ++} ++ + static int core_get_max_pstate(void) + { + u64 tar; + u64 plat_info; + int max_pstate; ++ int tdp_ratio; + int err; + + rdmsrl(MSR_PLATFORM_INFO, plat_info); + max_pstate = (plat_info >> 8) & 0xFF; + ++ tdp_ratio = core_get_tdp_ratio(plat_info); ++ if (tdp_ratio <= 0) ++ return max_pstate; ++ ++ if (hwp_active) { ++ /* Turbo activation ratio is not used on HWP platforms */ ++ return tdp_ratio; ++ } ++ + err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); + if (!err) { ++ int tar_levels; ++ + /* Do some sanity checking for safety */ +- if (plat_info & 0x600000000) { +- u64 tdp_ctrl; +- u64 tdp_ratio; +- int tdp_msr; +- +- err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); +- if (err) +- goto skip_tar; +- +- tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3); +- err = rdmsrl_safe(tdp_msr, &tdp_ratio); +- if (err) +- goto skip_tar; +- +- /* For level 1 and 2, bits[23:16] contain the ratio */ +- if (tdp_ctrl) +- tdp_ratio >>= 16; - -- mperf = cpu->sample.mperf + delta_iowait_mperf; -- cpu->prev_cummulative_iowait = cummulative_iowait; -+ boost = cpu->iowait_boost; -+ cpu->iowait_boost >>= 1; +- tdp_ratio &= 0xff; /* ratios are only 8 bits long */ +- if (tdp_ratio - 1 == tar) { +- max_pstate = tar; +- pr_debug("max_pstate=TAC %x\n", max_pstate); +- } else { +- goto skip_tar; +- } ++ tar_levels = tar & 0xff; ++ if (tdp_ratio - 1 == tar_levels) { ++ max_pstate = tar_levels; ++ pr_debug("max_pstate=TAC %x\n", max_pstate); + } + } + +-skip_tar: + return max_pstate; + } + +@@ -1432,7 +1535,7 @@ + u64 val; + + val = (u64)pstate << 8; +- if (limits->no_turbo && !limits->turbo_disabled) ++ if (global.no_turbo && !global.turbo_disabled) + val |= (u64)1 << 32; + + return val; +@@ -1558,9 +1661,9 @@ + int max_perf = cpu->pstate.turbo_pstate; + int max_perf_adj; + int min_perf; +- struct perf_limits *perf_limits = limits; ++ struct perf_limits *perf_limits = &global; + +- if (limits->no_turbo || limits->turbo_disabled) ++ if (global.no_turbo || global.turbo_disabled) + max_perf = cpu->pstate.max_pstate; + + if (per_cpu_limits) +@@ -1695,7 +1798,7 @@ + + sample->busy_scaled = busy_frac * 100; + +- target = limits->no_turbo || limits->turbo_disabled ? ++ target = global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; + target += target >> 2; + target = mul_fp(target, busy_frac); +@@ -1759,13 +1862,11 @@ + + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + pstate = clamp_t(int, pstate, min_perf, max_perf); +- trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); + return pstate; + } + + static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) + { +- pstate = intel_pstate_prepare_request(cpu, pstate); + if (pstate == cpu->pstate.current_pstate) + return; + +@@ -1785,6 +1886,8 @@ + + update_turbo_state(); + ++ target_pstate = intel_pstate_prepare_request(cpu, target_pstate); ++ trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); + intel_pstate_update_pstate(cpu, target_pstate); + + sample = &cpu->sample; +@@ -1952,53 +2055,37 @@ + synchronize_sched(); + } + +-static void intel_pstate_set_performance_limits(struct perf_limits *limits) +-{ +- limits->no_turbo = 0; +- limits->turbo_disabled = 0; +- limits->max_perf_pct = 100; +- limits->max_perf = int_ext_tofp(1); +- limits->min_perf_pct = 100; +- limits->min_perf = int_ext_tofp(1); +- limits->max_policy_pct = 100; +- limits->max_sysfs_pct = 100; +- limits->min_policy_pct = 0; +- limits->min_sysfs_pct = 0; +-} +- + static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, + struct perf_limits *limits) + { ++ int32_t max_policy_perf, min_policy_perf; + +- limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, +- policy->cpuinfo.max_freq); +- limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); ++ max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq); ++ max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1)); + if (policy->max == policy->min) { +- limits->min_policy_pct = limits->max_policy_pct; ++ min_policy_perf = max_policy_perf; + } else { +- limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100, +- policy->cpuinfo.max_freq); +- limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, +- 0, 100); +- } +- +- /* Normalize user input to [min_policy_pct, max_policy_pct] */ +- limits->min_perf_pct = max(limits->min_policy_pct, +- limits->min_sysfs_pct); +- limits->min_perf_pct = min(limits->max_policy_pct, +- limits->min_perf_pct); +- limits->max_perf_pct = min(limits->max_policy_pct, +- limits->max_sysfs_pct); +- limits->max_perf_pct = max(limits->min_policy_pct, +- limits->max_perf_pct); ++ min_policy_perf = div_ext_fp(policy->min, ++ policy->cpuinfo.max_freq); ++ min_policy_perf = clamp_t(int32_t, min_policy_perf, ++ 0, max_policy_perf); ++ } ++ ++ /* Normalize user input to [min_perf, max_perf] */ ++ limits->min_perf = max(min_policy_perf, ++ percent_ext_fp(limits->min_sysfs_pct)); ++ limits->min_perf = min(limits->min_perf, max_policy_perf); ++ limits->max_perf = min(max_policy_perf, ++ percent_ext_fp(limits->max_sysfs_pct)); ++ limits->max_perf = max(min_policy_perf, limits->max_perf); + +- /* Make sure min_perf_pct <= max_perf_pct */ +- limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); ++ /* Make sure min_perf <= max_perf */ ++ limits->min_perf = min(limits->min_perf, limits->max_perf); + +- limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); +- limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); + limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); + limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); ++ limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100); ++ limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100); + + pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, + limits->max_perf_pct, limits->min_perf_pct); +@@ -2007,7 +2094,7 @@ + static int intel_pstate_set_policy(struct cpufreq_policy *policy) + { + struct cpudata *cpu; +- struct perf_limits *perf_limits = NULL; ++ struct perf_limits *perf_limits = &global; + + if (!policy->cpuinfo.max_freq) + return -ENODEV; +@@ -2030,28 +2117,8 @@ + + mutex_lock(&intel_pstate_limits_lock); + +- if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { +- if (!perf_limits) { +- limits = &performance_limits; +- perf_limits = limits; +- } +- if (policy->max >= policy->cpuinfo.max_freq && +- !limits->no_turbo) { +- pr_debug("set performance\n"); +- intel_pstate_set_performance_limits(perf_limits); +- goto out; +- } +- } else { +- pr_debug("set powersave\n"); +- if (!perf_limits) { +- limits = &powersave_limits; +- perf_limits = limits; +- } +- +- } +- + intel_pstate_update_perf_limits(policy, perf_limits); +- out: ++ + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { + /* + * NOHZ_FULL CPUs need this as the governor callback may not +@@ -2072,6 +2139,13 @@ + + static int intel_pstate_verify_policy(struct cpufreq_policy *policy) + { ++ struct cpudata *cpu = all_cpu_data[policy->cpu]; ++ ++ update_turbo_state(); ++ policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ? ++ cpu->pstate.max_freq : ++ cpu->pstate.turbo_freq; ++ + cpufreq_verify_within_cpu_limits(policy); + + if (policy->policy != CPUFREQ_POLICY_POWERSAVE && +@@ -2083,9 +2157,9 @@ + unsigned int max_freq, min_freq; + + max_freq = policy->cpuinfo.max_freq * +- limits->max_sysfs_pct / 100; ++ global.max_sysfs_pct / 100; + min_freq = policy->cpuinfo.max_freq * +- limits->min_sysfs_pct / 100; ++ global.min_sysfs_pct / 100; + cpufreq_verify_within_limits(policy, min_freq, max_freq); + } + +@@ -2128,13 +2202,8 @@ + + cpu = all_cpu_data[policy->cpu]; - /* -- * The load can be estimated as the ratio of the mperf counter -- * running at a constant frequency during active periods -- * (C0) and the time stamp counter running at the same frequency -- * also during C-states. +- * We need sane value in the cpu->perf_limits, so inherit from global +- * perf_limits limits, which are seeded with values based on the +- * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up. - */ -- cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); -- cpu->sample.busy_scaled = cpu_load; -+ if (busy_frac < boost) -+ busy_frac = boost; + if (per_cpu_limits) +- memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits)); ++ intel_pstate_init_limits(cpu->perf_limits); + + policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; + policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; +@@ -2142,7 +2211,7 @@ + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; + update_turbo_state(); +- policy->cpuinfo.max_freq = limits->turbo_disabled ? ++ policy->cpuinfo.max_freq = global.turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; + policy->cpuinfo.max_freq *= cpu->pstate.scaling; + +@@ -2162,7 +2231,7 @@ + return ret; + + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; +- if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) ++ if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE)) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; +@@ -2186,46 +2255,16 @@ + static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) + { + struct cpudata *cpu = all_cpu_data[policy->cpu]; +- struct perf_limits *perf_limits = limits; + + update_turbo_state(); +- policy->cpuinfo.max_freq = limits->turbo_disabled ? ++ policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_freq : cpu->pstate.turbo_freq; + + cpufreq_verify_within_cpu_limits(policy); + +- if (per_cpu_limits) +- perf_limits = cpu->perf_limits; +- +- mutex_lock(&intel_pstate_limits_lock); +- +- intel_pstate_update_perf_limits(policy, perf_limits); +- +- mutex_unlock(&intel_pstate_limits_lock); +- + return 0; + } + +-static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, +- struct cpufreq_policy *policy, +- unsigned int target_freq) +-{ +- unsigned int max_freq; +- +- update_turbo_state(); +- +- max_freq = limits->no_turbo || limits->turbo_disabled ? +- cpu->pstate.max_freq : cpu->pstate.turbo_freq; +- policy->cpuinfo.max_freq = max_freq; +- if (policy->max > max_freq) +- policy->max = max_freq; +- +- if (target_freq > max_freq) +- target_freq = max_freq; +- +- return target_freq; +-} +- + static int intel_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +@@ -2234,8 +2273,10 @@ + struct cpufreq_freqs freqs; + int target_pstate; + ++ update_turbo_state(); ++ + freqs.old = policy->cur; +- freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq); ++ freqs.new = target_freq; + + cpufreq_freq_transition_begin(policy, &freqs); + switch (relation) { +@@ -2255,6 +2296,7 @@ + wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, + pstate_funcs.get_val(cpu, target_pstate)); + } ++ freqs.new = target_pstate * cpu->pstate.scaling; + cpufreq_freq_transition_end(policy, &freqs, false); + + return 0; +@@ -2266,10 +2308,12 @@ + struct cpudata *cpu = all_cpu_data[policy->cpu]; + int target_pstate; + +- target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); ++ update_turbo_state(); ++ + target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); ++ target_pstate = intel_pstate_prepare_request(cpu, target_pstate); + intel_pstate_update_pstate(cpu, target_pstate); +- return target_freq; ++ return target_pstate * cpu->pstate.scaling; + } + + static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) +@@ -2299,6 +2343,113 @@ + + static struct cpufreq_driver *intel_pstate_driver = &intel_pstate; + ++static void intel_pstate_driver_cleanup(void) ++{ ++ unsigned int cpu; ++ ++ get_online_cpus(); ++ for_each_online_cpu(cpu) { ++ if (all_cpu_data[cpu]) { ++ if (intel_pstate_driver == &intel_pstate) ++ intel_pstate_clear_update_util_hook(cpu); ++ ++ kfree(all_cpu_data[cpu]); ++ all_cpu_data[cpu] = NULL; ++ } ++ } ++ put_online_cpus(); ++} ++ ++static int intel_pstate_register_driver(void) ++{ ++ int ret; ++ ++ intel_pstate_init_limits(&global); ++ ++ ret = cpufreq_register_driver(intel_pstate_driver); ++ if (ret) { ++ intel_pstate_driver_cleanup(); ++ return ret; ++ } ++ ++ mutex_lock(&intel_pstate_limits_lock); ++ driver_registered = true; ++ mutex_unlock(&intel_pstate_limits_lock); ++ ++ if (intel_pstate_driver == &intel_pstate && !hwp_active && ++ pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) ++ intel_pstate_debug_expose_params(); ++ ++ return 0; ++} ++ ++static int intel_pstate_unregister_driver(void) ++{ ++ if (hwp_active) ++ return -EBUSY; ++ ++ if (intel_pstate_driver == &intel_pstate && !hwp_active && ++ pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) ++ intel_pstate_debug_hide_params(); ++ ++ mutex_lock(&intel_pstate_limits_lock); ++ driver_registered = false; ++ mutex_unlock(&intel_pstate_limits_lock); ++ ++ cpufreq_unregister_driver(intel_pstate_driver); ++ intel_pstate_driver_cleanup(); ++ ++ return 0; ++} ++ ++static ssize_t intel_pstate_show_status(char *buf) ++{ ++ if (!driver_registered) ++ return sprintf(buf, "off\n"); ++ ++ return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ? ++ "active" : "passive"); ++} ++ ++static int intel_pstate_update_status(const char *buf, size_t size) ++{ ++ int ret; ++ ++ if (size == 3 && !strncmp(buf, "off", size)) ++ return driver_registered ? ++ intel_pstate_unregister_driver() : -EINVAL; ++ ++ if (size == 6 && !strncmp(buf, "active", size)) { ++ if (driver_registered) { ++ if (intel_pstate_driver == &intel_pstate) ++ return 0; ++ ++ ret = intel_pstate_unregister_driver(); ++ if (ret) ++ return ret; ++ } ++ ++ intel_pstate_driver = &intel_pstate; ++ return intel_pstate_register_driver(); ++ } ++ ++ if (size == 7 && !strncmp(buf, "passive", size)) { ++ if (driver_registered) { ++ if (intel_pstate_driver != &intel_pstate) ++ return 0; ++ ++ ret = intel_pstate_unregister_driver(); ++ if (ret) ++ return ret; ++ } ++ ++ intel_pstate_driver = &intel_cpufreq; ++ return intel_pstate_register_driver(); ++ } ++ ++ return -EINVAL; ++} ++ + static int no_load __initdata; + static int no_hwp __initdata; + static int hwp_only __initdata; +@@ -2486,9 +2637,9 @@ + + static int __init intel_pstate_init(void) + { +- int cpu, rc = 0; + const struct x86_cpu_id *id; + struct cpu_defaults *cpu_def; ++ int rc = 0; -- return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); -+ sample->busy_scaled = busy_frac * 100; -+ return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled); + if (no_load) + return -ENODEV; +@@ -2520,45 +2671,29 @@ + if (intel_pstate_platform_pwr_mgmt_exists()) + return -ENODEV; + ++ if (!hwp_active && hwp_only) ++ return -ENOTSUPP; ++ + pr_info("Intel P-state driver initializing\n"); + + all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); + if (!all_cpu_data) + return -ENOMEM; + +- if (!hwp_active && hwp_only) +- goto out; +- + intel_pstate_request_control_from_smm(); + +- rc = cpufreq_register_driver(intel_pstate_driver); +- if (rc) +- goto out; +- +- if (intel_pstate_driver == &intel_pstate && !hwp_active && +- pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) +- intel_pstate_debug_expose_params(); +- + intel_pstate_sysfs_expose_params(); + ++ mutex_lock(&intel_pstate_driver_lock); ++ rc = intel_pstate_register_driver(); ++ mutex_unlock(&intel_pstate_driver_lock); ++ if (rc) ++ return rc; ++ + if (hwp_active) + pr_info("HWP enabled\n"); + +- return rc; +-out: +- get_online_cpus(); +- for_each_online_cpu(cpu) { +- if (all_cpu_data[cpu]) { +- if (intel_pstate_driver == &intel_pstate) +- intel_pstate_clear_update_util_hook(cpu); +- +- kfree(all_cpu_data[cpu]); +- } +- } +- +- put_online_cpus(); +- vfree(all_cpu_data); +- return -ENODEV; ++ return 0; } + device_initcall(intel_pstate_init); - static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) |