diff options
author | Tony Lambiris | 2016-08-12 23:44:36 -0400 |
---|---|---|
committer | Tony Lambiris | 2016-08-12 23:44:36 -0400 |
commit | 5e1aae2c381876829eb1cc84de19ee56967175af (patch) | |
tree | fac1ae347c7eee8878a28ffffe39003cb0826ba9 /intel-pstate-backport.patch | |
parent | a5de2d64e1fe96b944bfd9f4c668b89f7aec5722 (diff) | |
download | aur-5e1aae2c381876829eb1cc84de19ee56967175af.tar.gz |
Bump to linux 4.7
Diffstat (limited to 'intel-pstate-backport.patch')
-rw-r--r-- | intel-pstate-backport.patch | 959 |
1 files changed, 222 insertions, 737 deletions
diff --git a/intel-pstate-backport.patch b/intel-pstate-backport.patch index 8b614640124..49e1ac003df 100644 --- a/intel-pstate-backport.patch +++ b/intel-pstate-backport.patch @@ -1,775 +1,260 @@ ---- linux-4.6/drivers/cpufreq/intel_pstate.c.orig 2016-05-15 18:43:13.000000000 -0400 -+++ linux-4.6/drivers/cpufreq/intel_pstate.c 2016-06-24 17:36:23.064118833 -0400 -@@ -10,6 +10,8 @@ - * of the License. - */ - -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ - #include <linux/kernel.h> - #include <linux/kernel_stat.h> - #include <linux/module.h> -@@ -39,10 +41,17 @@ - #define ATOM_TURBO_RATIOS 0x66c - #define ATOM_TURBO_VIDS 0x66d - -+#ifdef CONFIG_ACPI -+#include <acpi/processor.h> -+#endif -+ - #define FRAC_BITS 8 - #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) - #define fp_toint(X) ((X) >> FRAC_BITS) - -+#define EXT_BITS 6 -+#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS) -+ - static inline int32_t mul_fp(int32_t x, int32_t y) - { - return ((int64_t)x * (int64_t)y) >> FRAC_BITS; -@@ -64,12 +73,22 @@ - return ret; - } - -+static inline u64 mul_ext_fp(u64 x, u64 y) -+{ -+ return (x * y) >> EXT_FRAC_BITS; -+} -+ -+static inline u64 div_ext_fp(u64 x, u64 y) -+{ -+ return div64_u64(x << EXT_FRAC_BITS, y); -+} -+ - /** - * struct sample - Store performance sample -- * @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual -+ * @core_avg_perf: Ratio of APERF/MPERF which is the actual average - * performance during last sample period - * @busy_scaled: Scaled busy value which is used to calculate next -- * P state. This can be different than core_pct_busy -+ * P state. This can be different than core_avg_perf - * to account for cpu idle period - * @aperf: Difference of actual performance frequency clock count - * read from APERF MSR between last and current sample -@@ -84,7 +103,7 @@ - * data for choosing next P State. - */ - struct sample { -- int32_t core_pct_busy; -+ int32_t core_avg_perf; - int32_t busy_scaled; - u64 aperf; - u64 mperf; -@@ -162,6 +181,7 @@ - * struct cpudata - Per CPU instance data storage - * @cpu: CPU number for this instance data - * @update_util: CPUFreq utility callback information -+ * @update_util_set: CPUFreq utility callback is set - * @pstate: Stores P state limits for this CPU - * @vid: Stores VID limits for this CPU - * @pid: Stores PID parameters for this CPU -@@ -172,6 +192,8 @@ - * @prev_cummulative_iowait: IO Wait time difference from last and +--- linux-4.7/drivers/cpufreq/intel_pstate.c.dist 2016-07-24 15:23:50.000000000 -0400 ++++ linux-4.7/drivers/cpufreq/intel_pstate.c 2016-08-12 20:51:49.493249861 -0400 +@@ -35,6 +35,7 @@ + #include <asm/msr.h> + #include <asm/cpu_device_id.h> + #include <asm/cpufeature.h> ++#include <asm/intel-family.h> + + #define ATOM_RATIOS 0x66a + #define ATOM_VIDS 0x66b +@@ -96,7 +97,6 @@ + * read from MPERF MSR between last and current sample + * @tsc: Difference of time stamp counter between last and * current sample - * @sample: Storage for storing last Sample data -+ * @acpi_perf_data: Stores ACPI perf information read from _PSS -+ * @valid_pss_table: Set to true for valid ACPI _PSS entries found +- * @freq: Effective frequency calculated from APERF/MPERF + * @time: Current time from scheduler * - * This structure stores per CPU instance data for all CPUs. - */ -@@ -179,6 +201,7 @@ - int cpu; - - struct update_util_data update_util; -+ bool update_util_set; - - struct pstate_data pstate; - struct vid_data vid; -@@ -190,6 +213,10 @@ - u64 prev_tsc; - u64 prev_cummulative_iowait; - struct sample sample; -+#ifdef CONFIG_ACPI -+ struct acpi_processor_performance acpi_perf_data; -+ bool valid_pss_table; -+#endif + * This structure is used in the cpudata structure to store performance sample +@@ -108,7 +108,6 @@ + u64 aperf; + u64 mperf; + u64 tsc; +- int freq; + u64 time; }; - static struct cpudata **all_cpu_data; -@@ -258,6 +285,9 @@ - static struct pstate_funcs pstate_funcs; - static int hwp_active; - -+#ifdef CONFIG_ACPI -+static bool acpi_ppc; -+#endif - - /** - * struct perf_limits - Store user and policy limits -@@ -331,6 +361,124 @@ - static struct perf_limits *limits = &powersave_limits; - #endif - -+#ifdef CONFIG_ACPI -+ -+static bool intel_pstate_get_ppc_enable_status(void) -+{ -+ if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || -+ acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) -+ return true; -+ -+ return acpi_ppc; -+} -+ -+/* -+ * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and -+ * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and -+ * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state -+ * ratio, out of it only high 8 bits are used. For example 0x1700 is setting -+ * target ratio 0x17. The _PSS control value stores in a format which can be -+ * directly written to PERF_CTL MSR. But in intel_pstate driver this shift -+ * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). -+ * This function converts the _PSS control value to intel pstate driver format -+ * for comparison and assignment. -+ */ -+static int convert_to_native_pstate_format(struct cpudata *cpu, int index) -+{ -+ return cpu->acpi_perf_data.states[index].control >> 8; -+} -+ -+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) -+{ -+ struct cpudata *cpu; -+ int turbo_pss_ctl; -+ int ret; -+ int i; -+ -+ if (hwp_active) -+ return; -+ -+ if (!intel_pstate_get_ppc_enable_status()) -+ return; -+ -+ cpu = all_cpu_data[policy->cpu]; -+ -+ ret = acpi_processor_register_performance(&cpu->acpi_perf_data, -+ policy->cpu); -+ if (ret) -+ return; -+ -+ /* -+ * Check if the control value in _PSS is for PERF_CTL MSR, which should -+ * guarantee that the states returned by it map to the states in our -+ * list directly. -+ */ -+ if (cpu->acpi_perf_data.control_register.space_id != -+ ACPI_ADR_SPACE_FIXED_HARDWARE) -+ goto err; -+ -+ /* -+ * If there is only one entry _PSS, simply ignore _PSS and continue as -+ * usual without taking _PSS into account -+ */ -+ if (cpu->acpi_perf_data.state_count < 2) -+ goto err; -+ -+ pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); -+ for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { -+ pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", -+ (i == cpu->acpi_perf_data.state ? '*' : ' '), i, -+ (u32) cpu->acpi_perf_data.states[i].core_frequency, -+ (u32) cpu->acpi_perf_data.states[i].power, -+ (u32) cpu->acpi_perf_data.states[i].control); -+ } -+ -+ /* -+ * The _PSS table doesn't contain whole turbo frequency range. -+ * This just contains +1 MHZ above the max non turbo frequency, -+ * with control value corresponding to max turbo ratio. But -+ * when cpufreq set policy is called, it will call with this -+ * max frequency, which will cause a reduced performance as -+ * this driver uses real max turbo frequency as the max -+ * frequency. So correct this frequency in _PSS table to -+ * correct max turbo frequency based on the turbo ratio. -+ * Also need to convert to MHz as _PSS freq is in MHz. -+ */ -+ turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); -+ if (turbo_pss_ctl > cpu->pstate.max_pstate) -+ cpu->acpi_perf_data.states[0].core_frequency = -+ policy->cpuinfo.max_freq / 1000; -+ cpu->valid_pss_table = true; -+ pr_info("_PPC limits will be enforced\n"); -+ -+ return; -+ -+ err: -+ cpu->valid_pss_table = false; -+ acpi_processor_unregister_performance(policy->cpu); -+} -+ -+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) -+{ -+ struct cpudata *cpu; -+ -+ cpu = all_cpu_data[policy->cpu]; -+ if (!cpu->valid_pss_table) -+ return; -+ -+ acpi_processor_unregister_performance(policy->cpu); -+} -+ -+#else -+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) -+{ -+} -+ -+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) -+{ -+} -+#endif -+ - static inline void pid_reset(struct _pid *pid, int setpoint, int busy, - int deadband, int integral) { - pid->setpoint = int_tofp(setpoint); -@@ -341,17 +489,17 @@ - - static inline void pid_p_gain_set(struct _pid *pid, int percent) +@@ -281,9 +280,9 @@ + static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); + static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); + +-static struct pstate_adjust_policy pid_params; +-static struct pstate_funcs pstate_funcs; +-static int hwp_active; ++static struct pstate_adjust_policy pid_params __read_mostly; ++static struct pstate_funcs pstate_funcs __read_mostly; ++static int hwp_active __read_mostly; + + #ifdef CONFIG_ACPI + static bool acpi_ppc; +@@ -807,7 +806,8 @@ + static void intel_pstate_hwp_enable(struct cpudata *cpudata) { -- pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); -+ pid->p_gain = div_fp(percent, 100); - } + /* First disable HWP notification interrupt as we don't process them */ +- wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); ++ if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) ++ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); - static inline void pid_i_gain_set(struct _pid *pid, int percent) - { -- pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); -+ pid->i_gain = div_fp(percent, 100); + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } +@@ -944,7 +944,7 @@ + if (err) + goto skip_tar; + +- tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl; ++ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3); + err = rdmsrl_safe(tdp_msr, &tdp_ratio); + if (err) + goto skip_tar; +@@ -972,7 +972,7 @@ + u64 value; + int nont, ret; + +- rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); ++ rdmsrl(MSR_TURBO_RATIO_LIMIT, value); + nont = core_get_max_pstate(); + ret = (value) & 255; + if (ret <= nont) +@@ -1001,7 +1001,7 @@ + u64 value; + int nont, ret; + +- rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); ++ rdmsrl(MSR_TURBO_RATIO_LIMIT, value); + nont = core_get_max_pstate(); + ret = (((value) >> 8) & 0xFF); + if (ret <= nont) +@@ -1091,6 +1091,26 @@ + }, + }; - static inline void pid_d_gain_set(struct _pid *pid, int percent) ++static struct cpu_defaults bxt_params = { ++ .pid_policy = { ++ .sample_rate_ms = 10, ++ .deadband = 0, ++ .setpoint = 60, ++ .p_gain_pct = 14, ++ .d_gain_pct = 0, ++ .i_gain_pct = 4, ++ }, ++ .funcs = { ++ .get_max = core_get_max_pstate, ++ .get_max_physical = core_get_max_pstate_physical, ++ .get_min = core_get_min_pstate, ++ .get_turbo = core_get_turbo_pstate, ++ .get_scaling = core_get_scaling, ++ .get_val = core_get_val, ++ .get_target_pstate = get_target_pstate_use_cpu_load, ++ }, ++}; ++ + static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { -- pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); -+ pid->d_gain = div_fp(percent, 100); + int max_perf = cpu->pstate.turbo_pstate; +@@ -1113,17 +1133,12 @@ + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } - static signed int pid_calc(struct _pid *pid, int32_t busy) -@@ -537,7 +685,7 @@ - - total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; - no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; -- turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); -+ turbo_fp = div_fp(no_turbo, total); - turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); - return sprintf(buf, "%u\n", turbo_pct); - } -@@ -579,7 +727,7 @@ - - update_turbo_state(); - if (limits->turbo_disabled) { -- pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); -+ pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); - return -EPERM; - } - -@@ -608,8 +756,7 @@ - limits->max_perf_pct); - limits->max_perf_pct = max(limits->min_perf_pct, - limits->max_perf_pct); -- limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), -- int_tofp(100)); -+ limits->max_perf = div_fp(limits->max_perf_pct, 100); - - if (hwp_active) - intel_pstate_hwp_set_online_cpus(); -@@ -633,8 +780,7 @@ - limits->min_perf_pct); - limits->min_perf_pct = min(limits->max_perf_pct, - limits->min_perf_pct); -- limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), -- int_tofp(100)); -+ limits->min_perf = div_fp(limits->min_perf_pct, 100); - - if (hwp_active) - intel_pstate_hwp_set_online_cpus(); -@@ -1019,15 +1165,11 @@ - intel_pstate_set_min_pstate(cpu); - } - --static inline void intel_pstate_calc_busy(struct cpudata *cpu) -+static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) - { - struct sample *sample = &cpu->sample; -- int64_t core_pct; +-static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate) +-{ +- trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); +- cpu->pstate.current_pstate = pstate; +-} - -- core_pct = int_tofp(sample->aperf) * int_tofp(100); -- core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); - -- sample->core_pct_busy = (int32_t)core_pct; -+ sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf); - } - - static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) -@@ -1070,9 +1212,14 @@ - - static inline int32_t get_avg_frequency(struct cpudata *cpu) + static void intel_pstate_set_min_pstate(struct cpudata *cpu) { -- return fp_toint(mul_fp(cpu->sample.core_pct_busy, -- int_tofp(cpu->pstate.max_pstate_physical * -- cpu->pstate.scaling / 100))); -+ return mul_ext_fp(cpu->sample.core_avg_perf, -+ cpu->pstate.max_pstate_physical * cpu->pstate.scaling); -+} -+ -+static inline int32_t get_avg_pstate(struct cpudata *cpu) -+{ -+ return mul_ext_fp(cpu->pstate.max_pstate_physical, -+ cpu->sample.core_avg_perf); - } - - static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) -@@ -1107,49 +1254,43 @@ - cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); - cpu->sample.busy_scaled = cpu_load; - -- return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load); -+ return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); - } - - static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) - { -- int32_t core_busy, max_pstate, current_pstate, sample_ratio; -+ int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; - u64 duration_ns; + int pstate = cpu->pstate.min_pstate; +- intel_pstate_record_pstate(cpu, pstate); ++ trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); ++ cpu->pstate.current_pstate = pstate; /* -- * core_busy is the ratio of actual performance to max -- * max_pstate is the max non turbo pstate available -- * current_pstate was the pstate that was requested during -- * the last sample period. -- * -- * We normalize core_busy, which was our actual percent -- * performance to what we requested during the last sample -- * period. The result will be a percentage of busy at a -- * specified pstate. -+ * perf_scaled is the average performance during the last sampling -+ * period scaled by the ratio of the maximum P-state to the P-state -+ * requested last time (in percent). That measures the system's -+ * response to the previous P-state selection. - */ -- core_busy = cpu->sample.core_pct_busy; -- max_pstate = int_tofp(cpu->pstate.max_pstate_physical); -- current_pstate = int_tofp(cpu->pstate.current_pstate); -- core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); -+ max_pstate = cpu->pstate.max_pstate_physical; -+ current_pstate = cpu->pstate.current_pstate; -+ perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf, -+ div_fp(100 * max_pstate, current_pstate)); - - /* - * Since our utilization update callback will not run unless we are - * in C0, check if the actual elapsed time is significantly greater (3x) - * than our sample interval. If it is, then we were idle for a long -- * enough period of time to adjust our busyness. -+ * enough period of time to adjust our performance metric. - */ - duration_ns = cpu->sample.time - cpu->last_sample_time; - if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { -- sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), -- int_tofp(duration_ns)); -- core_busy = mul_fp(core_busy, sample_ratio); -+ sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns); -+ perf_scaled = mul_fp(perf_scaled, sample_ratio); - } else { - sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); - if (sample_ratio < int_tofp(1)) -- core_busy = 0; -+ perf_scaled = 0; - } - -- cpu->sample.busy_scaled = core_busy; -- return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy); -+ cpu->sample.busy_scaled = perf_scaled; -+ return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled); + * Generally, there is no guarantee that this code will always run on + * the CPU being updated, so force the register update to run on the +@@ -1283,10 +1298,11 @@ + + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + pstate = clamp_t(int, pstate, min_perf, max_perf); ++ trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); + if (pstate == cpu->pstate.current_pstate) + return; + +- intel_pstate_record_pstate(cpu, pstate); ++ cpu->pstate.current_pstate = pstate; + wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } - static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) -@@ -1179,7 +1320,7 @@ - intel_pstate_update_pstate(cpu, target_pstate); - - sample = &cpu->sample; -- trace_pstate_sample(fp_toint(sample->core_pct_busy), -+ trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf), - fp_toint(sample->busy_scaled), - from, - cpu->pstate.current_pstate, -@@ -1199,7 +1340,7 @@ - bool sample_taken = intel_pstate_sample(cpu, time); - - if (sample_taken) { -- intel_pstate_calc_busy(cpu); -+ intel_pstate_calc_avg_perf(cpu); - if (!hwp_active) - intel_pstate_adjust_busy_pstate(cpu); - } -@@ -1261,23 +1402,16 @@ - - intel_pstate_busy_pid_reset(cpu); +@@ -1334,29 +1350,32 @@ + (unsigned long)&policy } + + static const struct x86_cpu_id intel_pstate_cpu_ids[] = { +- ICPU(0x2a, core_params), +- ICPU(0x2d, core_params), +- ICPU(0x37, silvermont_params), +- ICPU(0x3a, core_params), +- ICPU(0x3c, core_params), +- ICPU(0x3d, core_params), +- ICPU(0x3e, core_params), +- ICPU(0x3f, core_params), +- ICPU(0x45, core_params), +- ICPU(0x46, core_params), +- ICPU(0x47, core_params), +- ICPU(0x4c, airmont_params), +- ICPU(0x4e, core_params), +- ICPU(0x4f, core_params), +- ICPU(0x5e, core_params), +- ICPU(0x56, core_params), +- ICPU(0x57, knl_params), ++ ICPU(INTEL_FAM6_SANDYBRIDGE, core_params), ++ ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_params), ++ ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_params), ++ ICPU(INTEL_FAM6_IVYBRIDGE, core_params), ++ ICPU(INTEL_FAM6_HASWELL_CORE, core_params), ++ ICPU(INTEL_FAM6_BROADWELL_CORE, core_params), ++ ICPU(INTEL_FAM6_IVYBRIDGE_X, core_params), ++ ICPU(INTEL_FAM6_HASWELL_X, core_params), ++ ICPU(INTEL_FAM6_HASWELL_ULT, core_params), ++ ICPU(INTEL_FAM6_HASWELL_GT3E, core_params), ++ ICPU(INTEL_FAM6_BROADWELL_GT3E, core_params), ++ ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_params), ++ ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_params), ++ ICPU(INTEL_FAM6_BROADWELL_X, core_params), ++ ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), ++ ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), ++ ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), ++ ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), + {} + }; + MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); + +-static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { +- ICPU(0x56, core_params), ++static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { ++ ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), ++ ICPU(INTEL_FAM6_BROADWELL_X, core_params), ++ ICPU(INTEL_FAM6_SKYLAKE_X, core_params), + {} + }; -- cpu->update_util.func = intel_pstate_update_util; -- -- pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); -+ pr_debug("controlling: cpu %d\n", cpunum); +@@ -1575,12 +1594,12 @@ + .name = "intel_pstate", + }; +-static int __initdata no_load; +-static int __initdata no_hwp; +-static int __initdata hwp_only; +-static unsigned int force_load; ++static int no_load __initdata; ++static int no_hwp __initdata; ++static int hwp_only __initdata; ++static unsigned int force_load __initdata; + +-static int intel_pstate_msrs_not_valid(void) ++static int __init intel_pstate_msrs_not_valid(void) + { + if (!pstate_funcs.get_max() || + !pstate_funcs.get_min() || +@@ -1590,7 +1609,7 @@ return 0; } - static unsigned int intel_pstate_get(unsigned int cpu_num) +-static void copy_pid_params(struct pstate_adjust_policy *policy) ++static void __init copy_pid_params(struct pstate_adjust_policy *policy) { -- struct sample *sample; -- struct cpudata *cpu; -+ struct cpudata *cpu = all_cpu_data[cpu_num]; - -- cpu = all_cpu_data[cpu_num]; -- if (!cpu) -- return 0; -- sample = &cpu->sample; -- return get_avg_frequency(cpu); -+ return cpu ? get_avg_frequency(cpu) : 0; + pid_params.sample_rate_ms = policy->sample_rate_ms; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; +@@ -1601,7 +1620,7 @@ + pid_params.setpoint = policy->setpoint; } - static void intel_pstate_set_update_util_hook(unsigned int cpu_num) -@@ -1286,12 +1420,20 @@ - - /* Prevent intel_pstate_update_util() from using stale data. */ - cpu->sample.time = 0; -- cpufreq_set_update_util_data(cpu_num, &cpu->update_util); -+ cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, -+ intel_pstate_update_util); -+ cpu->update_util_set = true; - } - - static void intel_pstate_clear_update_util_hook(unsigned int cpu) +-static void copy_cpu_funcs(struct pstate_funcs *funcs) ++static void __init copy_cpu_funcs(struct pstate_funcs *funcs) { -- cpufreq_set_update_util_data(cpu, NULL); -+ struct cpudata *cpu_data = all_cpu_data[cpu]; -+ -+ if (!cpu_data->update_util_set) -+ return; -+ -+ cpufreq_remove_update_util_hook(cpu); -+ cpu_data->update_util_set = false; - synchronize_sched(); - } + pstate_funcs.get_max = funcs->get_max; + pstate_funcs.get_max_physical = funcs->get_max_physical; +@@ -1616,7 +1635,7 @@ -@@ -1311,20 +1453,31 @@ + #ifdef CONFIG_ACPI - static int intel_pstate_set_policy(struct cpufreq_policy *policy) +-static bool intel_pstate_no_acpi_pss(void) ++static bool __init intel_pstate_no_acpi_pss(void) { -+ struct cpudata *cpu; -+ - if (!policy->cpuinfo.max_freq) - return -ENODEV; - - intel_pstate_clear_update_util_hook(policy->cpu); - -+ cpu = all_cpu_data[0]; -+ if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) { -+ if (policy->max < policy->cpuinfo.max_freq && -+ policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { -+ pr_debug("policy->max > max non turbo frequency\n"); -+ policy->max = policy->cpuinfo.max_freq; -+ } -+ } -+ - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { - limits = &performance_limits; - if (policy->max >= policy->cpuinfo.max_freq) { -- pr_debug("intel_pstate: set performance\n"); -+ pr_debug("set performance\n"); - intel_pstate_set_performance_limits(limits); - goto out; - } - } else { -- pr_debug("intel_pstate: set powersave\n"); -+ pr_debug("set powersave\n"); - limits = &powersave_limits; - } - -@@ -1348,10 +1501,8 @@ - /* Make sure min_perf_pct <= max_perf_pct */ - limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - -- limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), -- int_tofp(100)); -- limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), -- int_tofp(100)); -+ limits->min_perf = div_fp(limits->min_perf_pct, 100); -+ limits->max_perf = div_fp(limits->max_perf_pct, 100); - - out: - intel_pstate_set_update_util_hook(policy->cpu); -@@ -1377,7 +1528,7 @@ - int cpu_num = policy->cpu; - struct cpudata *cpu = all_cpu_data[cpu_num]; - -- pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); -+ pr_debug("CPU %d exiting\n", cpu_num); - - intel_pstate_clear_update_util_hook(cpu_num); - -@@ -1410,12 +1561,20 @@ - policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; - policy->cpuinfo.max_freq = - cpu->pstate.turbo_pstate * cpu->pstate.scaling; -+ intel_pstate_init_acpi_perf_limits(policy); - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - cpumask_set_cpu(policy->cpu, policy->cpus); - - return 0; - } - -+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) -+{ -+ intel_pstate_exit_perf_limits(policy); -+ -+ return 0; -+} -+ - static struct cpufreq_driver intel_pstate_driver = { - .flags = CPUFREQ_CONST_LOOPS, - .verify = intel_pstate_verify_policy, -@@ -1423,6 +1582,7 @@ - .resume = intel_pstate_hwp_set_policy, - .get = intel_pstate_get, - .init = intel_pstate_cpu_init, -+ .exit = intel_pstate_cpu_exit, - .stop_cpu = intel_pstate_stop_cpu, - .name = "intel_pstate", - }; -@@ -1466,8 +1626,7 @@ - - } - --#if IS_ENABLED(CONFIG_ACPI) --#include <acpi/processor.h> -+#ifdef CONFIG_ACPI - - static bool intel_pstate_no_acpi_pss(void) - { -@@ -1623,7 +1782,7 @@ - if (intel_pstate_platform_pwr_mgmt_exists()) - return -ENODEV; - -- pr_info("Intel P-state driver initializing.\n"); -+ pr_info("Intel P-state driver initializing\n"); - - all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); - if (!all_cpu_data) -@@ -1640,7 +1799,7 @@ - intel_pstate_sysfs_expose_params(); - - if (hwp_active) -- pr_info("intel_pstate: HWP enabled\n"); -+ pr_info("HWP enabled\n"); - - return rc; - out: -@@ -1666,13 +1825,19 @@ - if (!strcmp(str, "disable")) - no_load = 1; - if (!strcmp(str, "no_hwp")) { -- pr_info("intel_pstate: HWP disabled\n"); -+ pr_info("HWP disabled\n"); - no_hwp = 1; - } - if (!strcmp(str, "force")) - force_load = 1; - if (!strcmp(str, "hwp_only")) - hwp_only = 1; -+ -+#ifdef CONFIG_ACPI -+ if (!strcmp(str, "support_acpi_ppc")) -+ acpi_ppc = true; -+#endif -+ - return 0; - } - early_param("intel_pstate", intel_pstate_setup); ---- linux-4.6/kernel/sched/cpufreq.c.orig 2016-06-24 15:32:20.064495916 -0400 -+++ linux-4.6/kernel/sched/cpufreq.c 2016-06-24 15:33:47.717298423 -0400 -@@ -35,3 +35,52 @@ - rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); - } - EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); -+ -+/** -+ * cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer. -+ * @cpu: The CPU to set the pointer for. -+ * @data: New pointer value. -+ * @func: Callback function to set for the CPU. -+ * -+ * Set and publish the update_util_data pointer for the given CPU. -+ * -+ * The update_util_data pointer of @cpu is set to @data and the callback -+ * function pointer in the target struct update_util_data is set to @func. -+ * That function will be called by cpufreq_update_util() from RCU-sched -+ * read-side critical sections, so it must not sleep. @data will always be -+ * passed to it as the first argument which allows the function to get to the -+ * target update_util_data structure and its container. -+ * -+ * The update_util_data pointer of @cpu must be NULL when this function is -+ * called or it will WARN() and return with no effect. -+ */ -+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, -+ void (*func)(struct update_util_data *data, u64 time, -+ unsigned long util, unsigned long max)) -+{ -+ if (WARN_ON(!data || !func)) -+ return; -+ -+ if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu))) -+ return; -+ -+ data->func = func; -+ rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); -+} -+EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook); -+ -+/** -+ * cpufreq_remove_update_util_hook - Clear the CPU's update_util_data pointer. -+ * @cpu: The CPU to clear the pointer for. -+ * -+ * Clear the update_util_data pointer for the given CPU. -+ * -+ * Callers must use RCU-sched callbacks to free any memory that might be -+ * accessed via the old update_util_data pointer or invoke synchronize_sched() -+ * right after this function to avoid use-after-free. -+ */ -+void cpufreq_remove_update_util_hook(int cpu) -+{ -+ rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL); -+} -+EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook); ---- linux-4.6/include/linux/sched.h.dist 2016-06-24 19:19:15.391657951 -0400 -+++ linux-4.6/include/linux/sched.h 2016-06-24 19:21:46.863939933 -0400 -@@ -3241,6 +3241,10 @@ - }; - - void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); -+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, -+ void (*func)(struct update_util_data *data, u64 time, -+ unsigned long util, unsigned long max)); -+void cpufreq_remove_update_util_hook(int cpu); - #endif /* CONFIG_CPU_FREQ */ + int i; - #endif ---- linux-4.6/drivers/cpufreq/intel_pstate.c.orig 2016-07-03 10:37:53.324091642 -0400 -+++ linux-4.6/drivers/cpufreq/intel_pstate.c 2016-07-03 10:38:50.450757945 -0400 -@@ -372,26 +372,9 @@ - return acpi_ppc; +@@ -1645,7 +1664,7 @@ + return true; } --/* -- * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and -- * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and -- * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state -- * ratio, out of it only high 8 bits are used. For example 0x1700 is setting -- * target ratio 0x17. The _PSS control value stores in a format which can be -- * directly written to PERF_CTL MSR. But in intel_pstate driver this shift -- * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). -- * This function converts the _PSS control value to intel pstate driver format -- * for comparison and assignment. -- */ --static int convert_to_native_pstate_format(struct cpudata *cpu, int index) --{ -- return cpu->acpi_perf_data.states[index].control >> 8; --} -- - static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) +-static bool intel_pstate_has_acpi_ppc(void) ++static bool __init intel_pstate_has_acpi_ppc(void) { - struct cpudata *cpu; -- int turbo_pss_ctl; - int ret; int i; -@@ -441,15 +424,14 @@ - * max frequency, which will cause a reduced performance as - * this driver uses real max turbo frequency as the max - * frequency. So correct this frequency in _PSS table to -- * correct max turbo frequency based on the turbo ratio. -+ * correct max turbo frequency based on the turbo state. - * Also need to convert to MHz as _PSS freq is in MHz. - */ -- turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); -- if (turbo_pss_ctl > cpu->pstate.max_pstate) -+ if (!limits->turbo_disabled) - cpu->acpi_perf_data.states[0].core_frequency = - policy->cpuinfo.max_freq / 1000; - cpu->valid_pss_table = true; -- pr_info("_PPC limits will be enforced\n"); -+ pr_debug("_PPC limits will be enforced\n"); +@@ -1673,7 +1692,7 @@ + }; - return; + /* Hardware vendor-specific info that has its own power management modes */ +-static struct hw_vendor_info vendor_info[] = { ++static struct hw_vendor_info vendor_info[] __initdata = { + {1, "HP ", "ProLiant", PSS}, + {1, "ORACLE", "X4-2 ", PPC}, + {1, "ORACLE", "X4-2L ", PPC}, +@@ -1692,7 +1711,7 @@ + {0, "", ""}, + }; -@@ -1418,6 +1400,9 @@ +-static bool intel_pstate_platform_pwr_mgmt_exists(void) ++static bool __init intel_pstate_platform_pwr_mgmt_exists(void) { - struct cpudata *cpu = all_cpu_data[cpu_num]; - -+ if (cpu->update_util_set) -+ return; -+ - /* Prevent intel_pstate_update_util() from using stale data. */ - cpu->sample.time = 0; - cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, -@@ -1458,15 +1443,15 @@ - if (!policy->cpuinfo.max_freq) - return -ENODEV; - -- intel_pstate_clear_update_util_hook(policy->cpu); -+ pr_debug("set_policy cpuinfo.max %u policy->max %u\n", -+ policy->cpuinfo.max_freq, policy->max); - - cpu = all_cpu_data[0]; -- if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) { -- if (policy->max < policy->cpuinfo.max_freq && -- policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { -- pr_debug("policy->max > max non turbo frequency\n"); -- policy->max = policy->cpuinfo.max_freq; -- } -+ if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && -+ policy->max < policy->cpuinfo.max_freq && -+ policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { -+ pr_debug("policy->max > max non turbo frequency\n"); -+ policy->max = policy->cpuinfo.max_freq; - } - - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { -@@ -1496,13 +1481,13 @@ - limits->max_sysfs_pct); - limits->max_perf_pct = max(limits->min_policy_pct, - limits->max_perf_pct); -- limits->max_perf = round_up(limits->max_perf, FRAC_BITS); - - /* Make sure min_perf_pct <= max_perf_pct */ - limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - - limits->min_perf = div_fp(limits->min_perf_pct, 100); - limits->max_perf = div_fp(limits->max_perf_pct, 100); -+ limits->max_perf = round_up(limits->max_perf, FRAC_BITS); - - out: - intel_pstate_set_update_util_hook(policy->cpu); -@@ -1559,8 +1544,11 @@ - - /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; -- policy->cpuinfo.max_freq = -- cpu->pstate.turbo_pstate * cpu->pstate.scaling; -+ update_turbo_state(); -+ policy->cpuinfo.max_freq = limits->turbo_disabled ? -+ cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; -+ policy->cpuinfo.max_freq *= cpu->pstate.scaling; -+ - intel_pstate_init_acpi_perf_limits(policy); - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - cpumask_set_cpu(policy->cpu, policy->cpus); + struct acpi_table_header hdr; + struct hw_vendor_info *v_info; |