summarylogtreecommitdiffstats
path: root/intel-pstate-backport.patch
diff options
context:
space:
mode:
Diffstat (limited to 'intel-pstate-backport.patch')
-rw-r--r--intel-pstate-backport.patch1231
1 files changed, 1100 insertions, 131 deletions
diff --git a/intel-pstate-backport.patch b/intel-pstate-backport.patch
index 9e466e174119..cf1427867261 100644
--- a/intel-pstate-backport.patch
+++ b/intel-pstate-backport.patch
@@ -1,148 +1,1117 @@
---- linux-4.8/drivers/cpufreq/intel_pstate.c.orig 2016-10-02 19:24:33.000000000 -0400
-+++ linux-4.8/drivers/cpufreq/intel_pstate.c 2016-10-09 19:32:01.073141319 -0400
-@@ -181,6 +181,8 @@
- * @cpu: CPU number for this instance data
- * @update_util: CPUFreq utility callback information
- * @update_util_set: CPUFreq utility callback is set
-+ * @iowait_boost: iowait-related boost fraction
-+ * @last_update: Time of the last update.
- * @pstate: Stores P state limits for this CPU
- * @vid: Stores VID limits for this CPU
- * @pid: Stores PID parameters for this CPU
-@@ -206,6 +208,7 @@
- struct vid_data vid;
- struct _pid pid;
-
-+ u64 last_update;
- u64 last_sample_time;
- u64 prev_aperf;
- u64 prev_mperf;
-@@ -216,6 +219,7 @@
- struct acpi_processor_performance acpi_perf_data;
- bool valid_pss_table;
- #endif
-+ unsigned int iowait_boost;
- };
+--- drivers/cpufreq/intel_pstate.c.orig 2017-02-19 17:34:00.000000000 -0500
++++ drivers/cpufreq/intel_pstate.c 2017-03-23 23:47:19.095511820 -0400
+@@ -19,7 +19,7 @@
+ #include <linux/hrtimer.h>
+ #include <linux/tick.h>
+ #include <linux/slab.h>
+-#include <linux/sched.h>
++#include <linux/sched/cpufreq.h>
+ #include <linux/list.h>
+ #include <linux/cpu.h>
+ #include <linux/cpufreq.h>
+@@ -39,11 +39,6 @@
- static struct cpudata **all_cpu_data;
-@@ -229,6 +233,7 @@
- * @p_gain_pct: PID proportional gain
- * @i_gain_pct: PID integral gain
- * @d_gain_pct: PID derivative gain
-+ * @boost_iowait: Whether or not to use iowait boosting.
- *
- * Stores per CPU model static PID configuration data.
- */
-@@ -240,6 +245,7 @@
- int p_gain_pct;
- int d_gain_pct;
- int i_gain_pct;
-+ bool boost_iowait;
- };
+ #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
+
+-#define ATOM_RATIOS 0x66a
+-#define ATOM_VIDS 0x66b
+-#define ATOM_TURBO_RATIOS 0x66c
+-#define ATOM_TURBO_VIDS 0x66d
+-
+ #ifdef CONFIG_ACPI
+ #include <acpi/processor.h>
+ #include <acpi/cppc_acpi.h>
+@@ -89,6 +84,11 @@
+ return div64_u64(x << EXT_FRAC_BITS, y);
+ }
++static inline int32_t percent_ext_fp(int percent)
++{
++ return div_ext_fp(percent, 100);
++}
++
/**
-@@ -1029,7 +1035,7 @@
- },
- };
+ * struct sample - Store performance sample
+ * @core_avg_perf: Ratio of APERF/MPERF which is the actual average
+@@ -358,42 +358,24 @@
+ static int hwp_active __read_mostly;
+ static bool per_cpu_limits __read_mostly;
--static struct cpu_defaults silvermont_params = {
-+static const struct cpu_defaults silvermont_params = {
- .pid_policy = {
- .sample_rate_ms = 10,
- .deadband = 0,
-@@ -1037,6 +1043,7 @@
- .p_gain_pct = 14,
- .d_gain_pct = 0,
- .i_gain_pct = 4,
-+ .boost_iowait = true,
- },
- .funcs = {
- .get_max = atom_get_max_pstate,
-@@ -1050,7 +1057,7 @@
- },
- };
++static bool driver_registered __read_mostly;
++
+ #ifdef CONFIG_ACPI
+ static bool acpi_ppc;
+ #endif
+
+-static struct perf_limits performance_limits = {
+- .no_turbo = 0,
+- .turbo_disabled = 0,
+- .max_perf_pct = 100,
+- .max_perf = int_ext_tofp(1),
+- .min_perf_pct = 100,
+- .min_perf = int_ext_tofp(1),
+- .max_policy_pct = 100,
+- .max_sysfs_pct = 100,
+- .min_policy_pct = 0,
+- .min_sysfs_pct = 0,
+-};
+-
+-static struct perf_limits powersave_limits = {
+- .no_turbo = 0,
+- .turbo_disabled = 0,
+- .max_perf_pct = 100,
+- .max_perf = int_ext_tofp(1),
+- .min_perf_pct = 0,
+- .min_perf = 0,
+- .max_policy_pct = 100,
+- .max_sysfs_pct = 100,
+- .min_policy_pct = 0,
+- .min_sysfs_pct = 0,
+-};
++static struct perf_limits global;
+
+-#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
+-static struct perf_limits *limits = &performance_limits;
+-#else
+-static struct perf_limits *limits = &powersave_limits;
+-#endif
++static void intel_pstate_init_limits(struct perf_limits *limits)
++{
++ memset(limits, 0, sizeof(*limits));
++ limits->max_perf_pct = 100;
++ limits->max_perf = int_ext_tofp(1);
++ limits->max_policy_pct = 100;
++ limits->max_sysfs_pct = 100;
++}
+
++static DEFINE_MUTEX(intel_pstate_driver_lock);
+ static DEFINE_MUTEX(intel_pstate_limits_lock);
+
+ #ifdef CONFIG_ACPI
+@@ -515,7 +497,7 @@
+ * correct max turbo frequency based on the turbo state.
+ * Also need to convert to MHz as _PSS freq is in MHz.
+ */
+- if (!limits->turbo_disabled)
++ if (!global.turbo_disabled)
+ cpu->acpi_perf_data.states[0].core_frequency =
+ policy->cpuinfo.max_freq / 1000;
+ cpu->valid_pss_table = true;
+@@ -538,7 +520,6 @@
+
+ acpi_processor_unregister_performance(policy->cpu);
+ }
+-
+ #else
+ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+ {
+@@ -635,7 +616,7 @@
+
+ cpu = all_cpu_data[0];
+ rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
+- limits->turbo_disabled =
++ global.turbo_disabled =
+ (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
+ cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
+ }
+@@ -859,12 +840,11 @@
+
+ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
+ {
+- int min, hw_min, max, hw_max, cpu, range, adj_range;
+- struct perf_limits *perf_limits = limits;
++ int min, hw_min, max, hw_max, cpu;
++ struct perf_limits *perf_limits = &global;
+ u64 value, cap;
+
+ for_each_cpu(cpu, policy->cpus) {
+- int max_perf_pct, min_perf_pct;
+ struct cpudata *cpu_data = all_cpu_data[cpu];
+ s16 epp;
--static struct cpu_defaults airmont_params = {
-+static const struct cpu_defaults airmont_params = {
- .pid_policy = {
- .sample_rate_ms = 10,
- .deadband = 0,
-@@ -1058,6 +1065,7 @@
- .p_gain_pct = 14,
- .d_gain_pct = 0,
- .i_gain_pct = 4,
-+ .boost_iowait = true,
- },
- .funcs = {
- .get_max = atom_get_max_pstate,
-@@ -1071,7 +1079,7 @@
- },
+@@ -873,26 +853,22 @@
+
+ rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+ hw_min = HWP_LOWEST_PERF(cap);
+- hw_max = HWP_HIGHEST_PERF(cap);
+- range = hw_max - hw_min;
++ if (global.no_turbo)
++ hw_max = HWP_GUARANTEED_PERF(cap);
++ else
++ hw_max = HWP_HIGHEST_PERF(cap);
+
+- max_perf_pct = perf_limits->max_perf_pct;
+- min_perf_pct = perf_limits->min_perf_pct;
++ max = fp_ext_toint(hw_max * perf_limits->max_perf);
++ if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
++ min = max;
++ else
++ min = fp_ext_toint(hw_max * perf_limits->min_perf);
+
+ rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+- adj_range = min_perf_pct * range / 100;
+- min = hw_min + adj_range;
++
+ value &= ~HWP_MIN_PERF(~0L);
+ value |= HWP_MIN_PERF(min);
+
+- adj_range = max_perf_pct * range / 100;
+- max = hw_min + adj_range;
+- if (limits->no_turbo) {
+- hw_max = HWP_GUARANTEED_PERF(cap);
+- if (hw_max < max)
+- max = hw_max;
+- }
+-
+ value &= ~HWP_MAX_PERF(~0L);
+ value |= HWP_MAX_PERF(max);
+
+@@ -996,6 +972,7 @@
+ static int pid_param_set(void *data, u64 val)
+ {
+ *(u32 *)data = val;
++ pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
+ intel_pstate_reset_all_pid();
+ return 0;
+ }
+@@ -1007,35 +984,57 @@
+ }
+ DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
+
++static struct dentry *debugfs_parent;
++
+ struct pid_param {
+ char *name;
+ void *value;
++ struct dentry *dentry;
};
--static struct cpu_defaults knl_params = {
-+static const struct cpu_defaults knl_params = {
- .pid_policy = {
- .sample_rate_ms = 10,
- .deadband = 0,
-@@ -1091,7 +1099,7 @@
- },
+ static struct pid_param pid_files[] = {
+- {"sample_rate_ms", &pid_params.sample_rate_ms},
+- {"d_gain_pct", &pid_params.d_gain_pct},
+- {"i_gain_pct", &pid_params.i_gain_pct},
+- {"deadband", &pid_params.deadband},
+- {"setpoint", &pid_params.setpoint},
+- {"p_gain_pct", &pid_params.p_gain_pct},
+- {NULL, NULL}
++ {"sample_rate_ms", &pid_params.sample_rate_ms, },
++ {"d_gain_pct", &pid_params.d_gain_pct, },
++ {"i_gain_pct", &pid_params.i_gain_pct, },
++ {"deadband", &pid_params.deadband, },
++ {"setpoint", &pid_params.setpoint, },
++ {"p_gain_pct", &pid_params.p_gain_pct, },
++ {NULL, NULL, }
};
--static struct cpu_defaults bxt_params = {
-+static const struct cpu_defaults bxt_params = {
- .pid_policy = {
- .sample_rate_ms = 10,
- .deadband = 0,
-@@ -1099,6 +1107,7 @@
- .p_gain_pct = 14,
- .d_gain_pct = 0,
- .i_gain_pct = 4,
-+ .boost_iowait = true,
- },
- .funcs = {
- .get_max = core_get_max_pstate,
-@@ -1222,36 +1231,18 @@
- static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
- {
- struct sample *sample = &cpu->sample;
-- u64 cummulative_iowait, delta_iowait_us;
-- u64 delta_iowait_mperf;
-- u64 mperf, now;
-- int32_t cpu_load;
-+ int32_t busy_frac, boost;
-
-- cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
-+ busy_frac = div_fp(sample->mperf, sample->tsc);
+-static void __init intel_pstate_debug_expose_params(void)
++static void intel_pstate_debug_expose_params(void)
+ {
+- struct dentry *debugfs_parent;
+- int i = 0;
++ int i;
+
+ debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
+ if (IS_ERR_OR_NULL(debugfs_parent))
+ return;
+- while (pid_files[i].name) {
+- debugfs_create_file(pid_files[i].name, 0660,
+- debugfs_parent, pid_files[i].value,
+- &fops_pid_param);
+- i++;
++
++ for (i = 0; pid_files[i].name; i++) {
++ struct dentry *dentry;
++
++ dentry = debugfs_create_file(pid_files[i].name, 0660,
++ debugfs_parent, pid_files[i].value,
++ &fops_pid_param);
++ if (!IS_ERR(dentry))
++ pid_files[i].dentry = dentry;
++ }
++}
++
++static void intel_pstate_debug_hide_params(void)
++{
++ int i;
++
++ if (IS_ERR_OR_NULL(debugfs_parent))
++ return;
++
++ for (i = 0; pid_files[i].name; i++) {
++ debugfs_remove(pid_files[i].dentry);
++ pid_files[i].dentry = NULL;
+ }
++
++ debugfs_remove(debugfs_parent);
++ debugfs_parent = NULL;
+ }
-- /*
-- * Convert iowait time into number of IO cycles spent at max_freq.
-- * IO is considered as busy only for the cpu_load algorithm. For
-- * performance this is not needed since we always try to reach the
-- * maximum P-State, so we are already boosting the IOs.
-- */
-- delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
-- delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
-- cpu->pstate.max_pstate, MSEC_PER_SEC);
+ /************************** debugfs end ************************/
+@@ -1045,9 +1044,37 @@
+ static ssize_t show_##file_name \
+ (struct kobject *kobj, struct attribute *attr, char *buf) \
+ { \
+- return sprintf(buf, "%u\n", limits->object); \
++ return sprintf(buf, "%u\n", global.object); \
+ }
+
++static ssize_t intel_pstate_show_status(char *buf);
++static int intel_pstate_update_status(const char *buf, size_t size);
++
++static ssize_t show_status(struct kobject *kobj,
++ struct attribute *attr, char *buf)
++{
++ ssize_t ret;
++
++ mutex_lock(&intel_pstate_driver_lock);
++ ret = intel_pstate_show_status(buf);
++ mutex_unlock(&intel_pstate_driver_lock);
++
++ return ret;
++}
++
++static ssize_t store_status(struct kobject *a, struct attribute *b,
++ const char *buf, size_t count)
++{
++ char *p = memchr(buf, '\n', count);
++ int ret;
++
++ mutex_lock(&intel_pstate_driver_lock);
++ ret = intel_pstate_update_status(buf, p ? p - buf : count);
++ mutex_unlock(&intel_pstate_driver_lock);
++
++ return ret < 0 ? ret : count;
++}
++
+ static ssize_t show_turbo_pct(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+ {
+@@ -1055,12 +1082,22 @@
+ int total, no_turbo, turbo_pct;
+ uint32_t turbo_fp;
+
++ mutex_lock(&intel_pstate_driver_lock);
++
++ if (!driver_registered) {
++ mutex_unlock(&intel_pstate_driver_lock);
++ return -EAGAIN;
++ }
++
+ cpu = all_cpu_data[0];
+
+ total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
+ no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
+ turbo_fp = div_fp(no_turbo, total);
+ turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
++
++ mutex_unlock(&intel_pstate_driver_lock);
++
+ return sprintf(buf, "%u\n", turbo_pct);
+ }
+
+@@ -1070,8 +1107,18 @@
+ struct cpudata *cpu;
+ int total;
+
++ mutex_lock(&intel_pstate_driver_lock);
++
++ if (!driver_registered) {
++ mutex_unlock(&intel_pstate_driver_lock);
++ return -EAGAIN;
++ }
++
+ cpu = all_cpu_data[0];
+ total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
++
++ mutex_unlock(&intel_pstate_driver_lock);
++
+ return sprintf(buf, "%u\n", total);
+ }
+
+@@ -1080,11 +1127,20 @@
+ {
+ ssize_t ret;
+
++ mutex_lock(&intel_pstate_driver_lock);
++
++ if (!driver_registered) {
++ mutex_unlock(&intel_pstate_driver_lock);
++ return -EAGAIN;
++ }
++
+ update_turbo_state();
+- if (limits->turbo_disabled)
+- ret = sprintf(buf, "%u\n", limits->turbo_disabled);
++ if (global.turbo_disabled)
++ ret = sprintf(buf, "%u\n", global.turbo_disabled);
+ else
+- ret = sprintf(buf, "%u\n", limits->no_turbo);
++ ret = sprintf(buf, "%u\n", global.no_turbo);
++
++ mutex_unlock(&intel_pstate_driver_lock);
+
+ return ret;
+ }
+@@ -1099,21 +1155,31 @@
+ if (ret != 1)
+ return -EINVAL;
+
++ mutex_lock(&intel_pstate_driver_lock);
++
++ if (!driver_registered) {
++ mutex_unlock(&intel_pstate_driver_lock);
++ return -EAGAIN;
++ }
++
+ mutex_lock(&intel_pstate_limits_lock);
+
+ update_turbo_state();
+- if (limits->turbo_disabled) {
++ if (global.turbo_disabled) {
+ pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
+ mutex_unlock(&intel_pstate_limits_lock);
++ mutex_unlock(&intel_pstate_driver_lock);
+ return -EPERM;
+ }
+
+- limits->no_turbo = clamp_t(int, input, 0, 1);
++ global.no_turbo = clamp_t(int, input, 0, 1);
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
+ intel_pstate_update_policies();
+
++ mutex_unlock(&intel_pstate_driver_lock);
++
+ return count;
+ }
+
+@@ -1127,21 +1193,27 @@
+ if (ret != 1)
+ return -EINVAL;
+
++ mutex_lock(&intel_pstate_driver_lock);
++
++ if (!driver_registered) {
++ mutex_unlock(&intel_pstate_driver_lock);
++ return -EAGAIN;
++ }
++
+ mutex_lock(&intel_pstate_limits_lock);
+
+- limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
+- limits->max_perf_pct = min(limits->max_policy_pct,
+- limits->max_sysfs_pct);
+- limits->max_perf_pct = max(limits->min_policy_pct,
+- limits->max_perf_pct);
+- limits->max_perf_pct = max(limits->min_perf_pct,
+- limits->max_perf_pct);
+- limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
++ global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
++ global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
++ global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
++ global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
++ global.max_perf = percent_ext_fp(global.max_perf_pct);
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
+ intel_pstate_update_policies();
+
++ mutex_unlock(&intel_pstate_driver_lock);
++
+ return count;
+ }
+
+@@ -1155,27 +1227,34 @@
+ if (ret != 1)
+ return -EINVAL;
+
++ mutex_lock(&intel_pstate_driver_lock);
++
++ if (!driver_registered) {
++ mutex_unlock(&intel_pstate_driver_lock);
++ return -EAGAIN;
++ }
++
+ mutex_lock(&intel_pstate_limits_lock);
+
+- limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
+- limits->min_perf_pct = max(limits->min_policy_pct,
+- limits->min_sysfs_pct);
+- limits->min_perf_pct = min(limits->max_policy_pct,
+- limits->min_perf_pct);
+- limits->min_perf_pct = min(limits->max_perf_pct,
+- limits->min_perf_pct);
+- limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
++ global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
++ global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
++ global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
++ global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
++ global.min_perf = percent_ext_fp(global.min_perf_pct);
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
+ intel_pstate_update_policies();
+
++ mutex_unlock(&intel_pstate_driver_lock);
++
+ return count;
+ }
+
+ show_one(max_perf_pct, max_perf_pct);
+ show_one(min_perf_pct, min_perf_pct);
+
++define_one_global_rw(status);
+ define_one_global_rw(no_turbo);
+ define_one_global_rw(max_perf_pct);
+ define_one_global_rw(min_perf_pct);
+@@ -1183,6 +1262,7 @@
+ define_one_global_ro(num_pstates);
+
+ static struct attribute *intel_pstate_attributes[] = {
++ &status.attr,
+ &no_turbo.attr,
+ &turbo_pct.attr,
+ &num_pstates.attr,
+@@ -1258,7 +1338,7 @@
+ {
+ u64 value;
+
+- rdmsrl(ATOM_RATIOS, value);
++ rdmsrl(MSR_ATOM_CORE_RATIOS, value);
+ return (value >> 8) & 0x7F;
+ }
+
+@@ -1266,7 +1346,7 @@
+ {
+ u64 value;
+
+- rdmsrl(ATOM_RATIOS, value);
++ rdmsrl(MSR_ATOM_CORE_RATIOS, value);
+ return (value >> 16) & 0x7F;
+ }
+
+@@ -1274,7 +1354,7 @@
+ {
+ u64 value;
+
+- rdmsrl(ATOM_TURBO_RATIOS, value);
++ rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
+ return value & 0x7F;
+ }
+
+@@ -1285,7 +1365,7 @@
+ u32 vid;
+
+ val = (u64)pstate << 8;
+- if (limits->no_turbo && !limits->turbo_disabled)
++ if (global.no_turbo && !global.turbo_disabled)
+ val |= (u64)1 << 32;
+
+ vid_fp = cpudata->vid.min + mul_fp(
+@@ -1336,7 +1416,7 @@
+ {
+ u64 value;
+
+- rdmsrl(ATOM_VIDS, value);
++ rdmsrl(MSR_ATOM_CORE_VIDS, value);
+ cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
+ cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
+ cpudata->vid.ratio = div_fp(
+@@ -1344,7 +1424,7 @@
+ int_tofp(cpudata->pstate.max_pstate -
+ cpudata->pstate.min_pstate));
+
+- rdmsrl(ATOM_TURBO_VIDS, value);
++ rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
+ cpudata->vid.turbo = value & 0x7f;
+ }
+
+@@ -1364,48 +1444,71 @@
+ return (value >> 8) & 0xFF;
+ }
+
++static int core_get_tdp_ratio(u64 plat_info)
++{
++ /* Check how many TDP levels present */
++ if (plat_info & 0x600000000) {
++ u64 tdp_ctrl;
++ u64 tdp_ratio;
++ int tdp_msr;
++ int err;
++
++ /* Get the TDP level (0, 1, 2) to get ratios */
++ err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
++ if (err)
++ return err;
++
++ /* TDP MSR are continuous starting at 0x648 */
++ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
++ err = rdmsrl_safe(tdp_msr, &tdp_ratio);
++ if (err)
++ return err;
++
++ /* For level 1 and 2, bits[23:16] contain the ratio */
++ if (tdp_ctrl & 0x03)
++ tdp_ratio >>= 16;
++
++ tdp_ratio &= 0xff; /* ratios are only 8 bits long */
++ pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
++
++ return (int)tdp_ratio;
++ }
++
++ return -ENXIO;
++}
++
+ static int core_get_max_pstate(void)
+ {
+ u64 tar;
+ u64 plat_info;
+ int max_pstate;
++ int tdp_ratio;
+ int err;
+
+ rdmsrl(MSR_PLATFORM_INFO, plat_info);
+ max_pstate = (plat_info >> 8) & 0xFF;
+
++ tdp_ratio = core_get_tdp_ratio(plat_info);
++ if (tdp_ratio <= 0)
++ return max_pstate;
++
++ if (hwp_active) {
++ /* Turbo activation ratio is not used on HWP platforms */
++ return tdp_ratio;
++ }
++
+ err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
+ if (!err) {
++ int tar_levels;
++
+ /* Do some sanity checking for safety */
+- if (plat_info & 0x600000000) {
+- u64 tdp_ctrl;
+- u64 tdp_ratio;
+- int tdp_msr;
+-
+- err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+- if (err)
+- goto skip_tar;
+-
+- tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3);
+- err = rdmsrl_safe(tdp_msr, &tdp_ratio);
+- if (err)
+- goto skip_tar;
+-
+- /* For level 1 and 2, bits[23:16] contain the ratio */
+- if (tdp_ctrl)
+- tdp_ratio >>= 16;
-
-- mperf = cpu->sample.mperf + delta_iowait_mperf;
-- cpu->prev_cummulative_iowait = cummulative_iowait;
-+ boost = cpu->iowait_boost;
-+ cpu->iowait_boost >>= 1;
+- tdp_ratio &= 0xff; /* ratios are only 8 bits long */
+- if (tdp_ratio - 1 == tar) {
+- max_pstate = tar;
+- pr_debug("max_pstate=TAC %x\n", max_pstate);
+- } else {
+- goto skip_tar;
+- }
++ tar_levels = tar & 0xff;
++ if (tdp_ratio - 1 == tar_levels) {
++ max_pstate = tar_levels;
++ pr_debug("max_pstate=TAC %x\n", max_pstate);
+ }
+ }
+
+-skip_tar:
+ return max_pstate;
+ }
+
+@@ -1432,7 +1535,7 @@
+ u64 val;
+
+ val = (u64)pstate << 8;
+- if (limits->no_turbo && !limits->turbo_disabled)
++ if (global.no_turbo && !global.turbo_disabled)
+ val |= (u64)1 << 32;
+
+ return val;
+@@ -1558,9 +1661,9 @@
+ int max_perf = cpu->pstate.turbo_pstate;
+ int max_perf_adj;
+ int min_perf;
+- struct perf_limits *perf_limits = limits;
++ struct perf_limits *perf_limits = &global;
+
+- if (limits->no_turbo || limits->turbo_disabled)
++ if (global.no_turbo || global.turbo_disabled)
+ max_perf = cpu->pstate.max_pstate;
+
+ if (per_cpu_limits)
+@@ -1695,7 +1798,7 @@
+
+ sample->busy_scaled = busy_frac * 100;
+
+- target = limits->no_turbo || limits->turbo_disabled ?
++ target = global.no_turbo || global.turbo_disabled ?
+ cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
+ target += target >> 2;
+ target = mul_fp(target, busy_frac);
+@@ -1759,13 +1862,11 @@
+
+ intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
+ pstate = clamp_t(int, pstate, min_perf, max_perf);
+- trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
+ return pstate;
+ }
+
+ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+ {
+- pstate = intel_pstate_prepare_request(cpu, pstate);
+ if (pstate == cpu->pstate.current_pstate)
+ return;
+
+@@ -1785,6 +1886,8 @@
+
+ update_turbo_state();
+
++ target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
++ trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
+ intel_pstate_update_pstate(cpu, target_pstate);
+
+ sample = &cpu->sample;
+@@ -1952,53 +2055,37 @@
+ synchronize_sched();
+ }
+
+-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
+-{
+- limits->no_turbo = 0;
+- limits->turbo_disabled = 0;
+- limits->max_perf_pct = 100;
+- limits->max_perf = int_ext_tofp(1);
+- limits->min_perf_pct = 100;
+- limits->min_perf = int_ext_tofp(1);
+- limits->max_policy_pct = 100;
+- limits->max_sysfs_pct = 100;
+- limits->min_policy_pct = 0;
+- limits->min_sysfs_pct = 0;
+-}
+-
+ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
+ struct perf_limits *limits)
+ {
++ int32_t max_policy_perf, min_policy_perf;
+
+- limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
+- policy->cpuinfo.max_freq);
+- limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
++ max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
++ max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
+ if (policy->max == policy->min) {
+- limits->min_policy_pct = limits->max_policy_pct;
++ min_policy_perf = max_policy_perf;
+ } else {
+- limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
+- policy->cpuinfo.max_freq);
+- limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
+- 0, 100);
+- }
+-
+- /* Normalize user input to [min_policy_pct, max_policy_pct] */
+- limits->min_perf_pct = max(limits->min_policy_pct,
+- limits->min_sysfs_pct);
+- limits->min_perf_pct = min(limits->max_policy_pct,
+- limits->min_perf_pct);
+- limits->max_perf_pct = min(limits->max_policy_pct,
+- limits->max_sysfs_pct);
+- limits->max_perf_pct = max(limits->min_policy_pct,
+- limits->max_perf_pct);
++ min_policy_perf = div_ext_fp(policy->min,
++ policy->cpuinfo.max_freq);
++ min_policy_perf = clamp_t(int32_t, min_policy_perf,
++ 0, max_policy_perf);
++ }
++
++ /* Normalize user input to [min_perf, max_perf] */
++ limits->min_perf = max(min_policy_perf,
++ percent_ext_fp(limits->min_sysfs_pct));
++ limits->min_perf = min(limits->min_perf, max_policy_perf);
++ limits->max_perf = min(max_policy_perf,
++ percent_ext_fp(limits->max_sysfs_pct));
++ limits->max_perf = max(min_policy_perf, limits->max_perf);
+
+- /* Make sure min_perf_pct <= max_perf_pct */
+- limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
++ /* Make sure min_perf <= max_perf */
++ limits->min_perf = min(limits->min_perf, limits->max_perf);
+
+- limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+- limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+ limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
+ limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
++ limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
++ limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
+
+ pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
+ limits->max_perf_pct, limits->min_perf_pct);
+@@ -2007,7 +2094,7 @@
+ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
+ {
+ struct cpudata *cpu;
+- struct perf_limits *perf_limits = NULL;
++ struct perf_limits *perf_limits = &global;
+
+ if (!policy->cpuinfo.max_freq)
+ return -ENODEV;
+@@ -2030,28 +2117,8 @@
+
+ mutex_lock(&intel_pstate_limits_lock);
+
+- if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+- if (!perf_limits) {
+- limits = &performance_limits;
+- perf_limits = limits;
+- }
+- if (policy->max >= policy->cpuinfo.max_freq &&
+- !limits->no_turbo) {
+- pr_debug("set performance\n");
+- intel_pstate_set_performance_limits(perf_limits);
+- goto out;
+- }
+- } else {
+- pr_debug("set powersave\n");
+- if (!perf_limits) {
+- limits = &powersave_limits;
+- perf_limits = limits;
+- }
+-
+- }
+-
+ intel_pstate_update_perf_limits(policy, perf_limits);
+- out:
++
+ if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ /*
+ * NOHZ_FULL CPUs need this as the governor callback may not
+@@ -2072,6 +2139,13 @@
+
+ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
+ {
++ struct cpudata *cpu = all_cpu_data[policy->cpu];
++
++ update_turbo_state();
++ policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
++ cpu->pstate.max_freq :
++ cpu->pstate.turbo_freq;
++
+ cpufreq_verify_within_cpu_limits(policy);
+
+ if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
+@@ -2083,9 +2157,9 @@
+ unsigned int max_freq, min_freq;
+
+ max_freq = policy->cpuinfo.max_freq *
+- limits->max_sysfs_pct / 100;
++ global.max_sysfs_pct / 100;
+ min_freq = policy->cpuinfo.max_freq *
+- limits->min_sysfs_pct / 100;
++ global.min_sysfs_pct / 100;
+ cpufreq_verify_within_limits(policy, min_freq, max_freq);
+ }
+
+@@ -2128,13 +2202,8 @@
+
+ cpu = all_cpu_data[policy->cpu];
- /*
-- * The load can be estimated as the ratio of the mperf counter
-- * running at a constant frequency during active periods
-- * (C0) and the time stamp counter running at the same frequency
-- * also during C-states.
+- * We need sane value in the cpu->perf_limits, so inherit from global
+- * perf_limits limits, which are seeded with values based on the
+- * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
- */
-- cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
-- cpu->sample.busy_scaled = cpu_load;
-+ if (busy_frac < boost)
-+ busy_frac = boost;
+ if (per_cpu_limits)
+- memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
++ intel_pstate_init_limits(cpu->perf_limits);
+
+ policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
+ policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+@@ -2142,7 +2211,7 @@
+ /* cpuinfo and default policy values */
+ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
+ update_turbo_state();
+- policy->cpuinfo.max_freq = limits->turbo_disabled ?
++ policy->cpuinfo.max_freq = global.turbo_disabled ?
+ cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
+ policy->cpuinfo.max_freq *= cpu->pstate.scaling;
+
+@@ -2162,7 +2231,7 @@
+ return ret;
+
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+- if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
++ if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ else
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
+@@ -2186,46 +2255,16 @@
+ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
+ {
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+- struct perf_limits *perf_limits = limits;
+
+ update_turbo_state();
+- policy->cpuinfo.max_freq = limits->turbo_disabled ?
++ policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
+ cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+
+ cpufreq_verify_within_cpu_limits(policy);
+
+- if (per_cpu_limits)
+- perf_limits = cpu->perf_limits;
+-
+- mutex_lock(&intel_pstate_limits_lock);
+-
+- intel_pstate_update_perf_limits(policy, perf_limits);
+-
+- mutex_unlock(&intel_pstate_limits_lock);
+-
+ return 0;
+ }
+
+-static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
+- struct cpufreq_policy *policy,
+- unsigned int target_freq)
+-{
+- unsigned int max_freq;
+-
+- update_turbo_state();
+-
+- max_freq = limits->no_turbo || limits->turbo_disabled ?
+- cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+- policy->cpuinfo.max_freq = max_freq;
+- if (policy->max > max_freq)
+- policy->max = max_freq;
+-
+- if (target_freq > max_freq)
+- target_freq = max_freq;
+-
+- return target_freq;
+-}
+-
+ static int intel_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+@@ -2234,8 +2273,10 @@
+ struct cpufreq_freqs freqs;
+ int target_pstate;
+
++ update_turbo_state();
++
+ freqs.old = policy->cur;
+- freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
++ freqs.new = target_freq;
+
+ cpufreq_freq_transition_begin(policy, &freqs);
+ switch (relation) {
+@@ -2255,6 +2296,7 @@
+ wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
+ pstate_funcs.get_val(cpu, target_pstate));
+ }
++ freqs.new = target_pstate * cpu->pstate.scaling;
+ cpufreq_freq_transition_end(policy, &freqs, false);
+
+ return 0;
+@@ -2266,10 +2308,12 @@
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+ int target_pstate;
+
+- target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
++ update_turbo_state();
++
+ target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
++ target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+ intel_pstate_update_pstate(cpu, target_pstate);
+- return target_freq;
++ return target_pstate * cpu->pstate.scaling;
+ }
+
+ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
+@@ -2299,6 +2343,113 @@
+
+ static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
+
++static void intel_pstate_driver_cleanup(void)
++{
++ unsigned int cpu;
++
++ get_online_cpus();
++ for_each_online_cpu(cpu) {
++ if (all_cpu_data[cpu]) {
++ if (intel_pstate_driver == &intel_pstate)
++ intel_pstate_clear_update_util_hook(cpu);
++
++ kfree(all_cpu_data[cpu]);
++ all_cpu_data[cpu] = NULL;
++ }
++ }
++ put_online_cpus();
++}
++
++static int intel_pstate_register_driver(void)
++{
++ int ret;
++
++ intel_pstate_init_limits(&global);
++
++ ret = cpufreq_register_driver(intel_pstate_driver);
++ if (ret) {
++ intel_pstate_driver_cleanup();
++ return ret;
++ }
++
++ mutex_lock(&intel_pstate_limits_lock);
++ driver_registered = true;
++ mutex_unlock(&intel_pstate_limits_lock);
++
++ if (intel_pstate_driver == &intel_pstate && !hwp_active &&
++ pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
++ intel_pstate_debug_expose_params();
++
++ return 0;
++}
++
++static int intel_pstate_unregister_driver(void)
++{
++ if (hwp_active)
++ return -EBUSY;
++
++ if (intel_pstate_driver == &intel_pstate && !hwp_active &&
++ pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
++ intel_pstate_debug_hide_params();
++
++ mutex_lock(&intel_pstate_limits_lock);
++ driver_registered = false;
++ mutex_unlock(&intel_pstate_limits_lock);
++
++ cpufreq_unregister_driver(intel_pstate_driver);
++ intel_pstate_driver_cleanup();
++
++ return 0;
++}
++
++static ssize_t intel_pstate_show_status(char *buf)
++{
++ if (!driver_registered)
++ return sprintf(buf, "off\n");
++
++ return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
++ "active" : "passive");
++}
++
++static int intel_pstate_update_status(const char *buf, size_t size)
++{
++ int ret;
++
++ if (size == 3 && !strncmp(buf, "off", size))
++ return driver_registered ?
++ intel_pstate_unregister_driver() : -EINVAL;
++
++ if (size == 6 && !strncmp(buf, "active", size)) {
++ if (driver_registered) {
++ if (intel_pstate_driver == &intel_pstate)
++ return 0;
++
++ ret = intel_pstate_unregister_driver();
++ if (ret)
++ return ret;
++ }
++
++ intel_pstate_driver = &intel_pstate;
++ return intel_pstate_register_driver();
++ }
++
++ if (size == 7 && !strncmp(buf, "passive", size)) {
++ if (driver_registered) {
++ if (intel_pstate_driver != &intel_pstate)
++ return 0;
++
++ ret = intel_pstate_unregister_driver();
++ if (ret)
++ return ret;
++ }
++
++ intel_pstate_driver = &intel_cpufreq;
++ return intel_pstate_register_driver();
++ }
++
++ return -EINVAL;
++}
++
+ static int no_load __initdata;
+ static int no_hwp __initdata;
+ static int hwp_only __initdata;
+@@ -2486,9 +2637,9 @@
+
+ static int __init intel_pstate_init(void)
+ {
+- int cpu, rc = 0;
+ const struct x86_cpu_id *id;
+ struct cpu_defaults *cpu_def;
++ int rc = 0;
-- return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
-+ sample->busy_scaled = busy_frac * 100;
-+ return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
+ if (no_load)
+ return -ENODEV;
+@@ -2520,45 +2671,29 @@
+ if (intel_pstate_platform_pwr_mgmt_exists())
+ return -ENODEV;
+
++ if (!hwp_active && hwp_only)
++ return -ENOTSUPP;
++
+ pr_info("Intel P-state driver initializing\n");
+
+ all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
+ if (!all_cpu_data)
+ return -ENOMEM;
+
+- if (!hwp_active && hwp_only)
+- goto out;
+-
+ intel_pstate_request_control_from_smm();
+
+- rc = cpufreq_register_driver(intel_pstate_driver);
+- if (rc)
+- goto out;
+-
+- if (intel_pstate_driver == &intel_pstate && !hwp_active &&
+- pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+- intel_pstate_debug_expose_params();
+-
+ intel_pstate_sysfs_expose_params();
+
++ mutex_lock(&intel_pstate_driver_lock);
++ rc = intel_pstate_register_driver();
++ mutex_unlock(&intel_pstate_driver_lock);
++ if (rc)
++ return rc;
++
+ if (hwp_active)
+ pr_info("HWP enabled\n");
+
+- return rc;
+-out:
+- get_online_cpus();
+- for_each_online_cpu(cpu) {
+- if (all_cpu_data[cpu]) {
+- if (intel_pstate_driver == &intel_pstate)
+- intel_pstate_clear_update_util_hook(cpu);
+-
+- kfree(all_cpu_data[cpu]);
+- }
+- }
+-
+- put_online_cpus();
+- vfree(all_cpu_data);
+- return -ENODEV;
++ return 0;
}
+ device_initcall(intel_pstate_init);
- static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)