diff -Naur a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h --- a/arch/x86/include/asm/idle.h 2016-05-01 23:53:53.784424167 +1000 +++ b/arch/x86/include/asm/idle.h 2016-05-01 23:54:08.214652936 +1000 @@ -1,13 +1,6 @@ #ifndef _ASM_X86_IDLE_H #define _ASM_X86_IDLE_H -#define IDLE_START 1 -#define IDLE_END 2 - -struct notifier_block; -void idle_notifier_register(struct notifier_block *n); -void idle_notifier_unregister(struct notifier_block *n); - #ifdef CONFIG_X86_64 void enter_idle(void); void exit_idle(void); diff -Naur a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c --- a/arch/x86/kernel/process.c 2016-05-01 23:53:57.111143565 +1000 +++ b/arch/x86/kernel/process.c 2016-05-01 23:54:08.216319629 +1000 @@ -62,19 +62,6 @@ #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); #endif /* @@ -251,14 +238,14 @@ void enter_idle(void) { this_cpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); + idle_notifier_call_chain(IDLE_START); } static void __exit_idle(void) { if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); + idle_notifier_call_chain(IDLE_END); } /* Called from interrupts to signify idle end */ diff -Naur a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt --- a/Documentation/cpu-freq/governors.txt 2016-05-01 23:54:17.309797175 +1000 +++ b/Documentation/cpu-freq/governors.txt 2016-05-01 23:54:20.669850471 +1000 @@ -28,6 +28,7 @@ 2.3 Userspace 2.4 Ondemand 2.5 Conservative +2.6 Interactive 3. The Governor Interface in the CPUfreq Core @@ -218,6 +219,90 @@ speed. Load for frequency increase is still evaluated every sampling rate. +2.6 Interactive +--------------- + +The CPUfreq governor "interactive" is designed for latency-sensitive, +interactive workloads. This governor sets the CPU speed depending on +usage, similar to "ondemand" and "conservative" governors, but with a +different set of configurable behaviors. + +The tuneable values for this governor are: + +target_loads: CPU load values used to adjust speed to influence the +current CPU load toward that value. In general, the lower the target +load, the more often the governor will raise CPU speeds to bring load +below the target. The format is a single target load, optionally +followed by pairs of CPU speeds and CPU loads to target at or above +those speeds. Colons can be used between the speeds and associated +target loads for readability. For example: + + 85 1000000:90 1700000:99 + +targets CPU load 85% below speed 1GHz, 90% at or above 1GHz, until +1.7GHz and above, at which load 99% is targeted. If speeds are +specified these must appear in ascending order. Higher target load +values are typically specified for higher speeds, that is, target load +values also usually appear in an ascending order. The default is +target load 90% for all speeds. + +min_sample_time: The minimum amount of time to spend at the current +frequency before ramping down. Default is 80000 uS. + +hispeed_freq: An intermediate "hi speed" at which to initially ramp +when CPU load hits the value specified in go_hispeed_load. If load +stays high for the amount of time specified in above_hispeed_delay, +then speed may be bumped higher. Default is the maximum speed +allowed by the policy at governor initialization time. + +go_hispeed_load: The CPU load at which to ramp to hispeed_freq. +Default is 99%. + +above_hispeed_delay: When speed is at or above hispeed_freq, wait for +this long before raising speed in response to continued high load. +The format is a single delay value, optionally followed by pairs of +CPU speeds and the delay to use at or above those speeds. Colons can +be used between the speeds and associated delays for readability. For +example: + + 80000 1300000:200000 1500000:40000 + +uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay +200000 uS is used until speed 1.5 GHz, at which speed (and above) +delay 40000 uS is used. If speeds are specified these must appear in +ascending order. Default is 20000 uS. + +timer_rate: Sample rate for reevaluating CPU load when the CPU is not +idle. A deferrable timer is used, such that the CPU will not be woken +from idle to service this timer until something else needs to run. +(The maximum time to allow deferring this timer when not running at +minimum speed is configurable via timer_slack.) Default is 20000 uS. + +timer_slack: Maximum additional time to defer handling the governor +sampling timer beyond timer_rate when running at speeds above the +minimum. For platforms that consume additional power at idle when +CPUs are running at speeds greater than minimum, this places an upper +bound on how long the timer will be deferred prior to re-evaluating +load and dropping speed. For example, if timer_rate is 20000uS and +timer_slack is 10000uS then timers will be deferred for up to 30msec +when not at lowest speed. A value of -1 means defer timers +indefinitely at all speeds. Default is 80000 uS. + +boost: If non-zero, immediately boost speed of all CPUs to at least +hispeed_freq until zero is written to this attribute. If zero, allow +CPU speeds to drop below hispeed_freq according to load as usual. +Default is zero. + +boostpulse: On each write, immediately boost speed of all CPUs to +hispeed_freq for at least the period of time specified by +boostpulse_duration, after which speeds are allowed to drop below +hispeed_freq according to load as usual. + +boostpulse_duration: Length of time to hold CPU speed at hispeed_freq +on a write to boostpulse, before allowing speed to drop according to +load as usual. Default is 80000 uS. + + 3. The Governor Interface in the CPUfreq Core ============================================= diff -Naur a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c --- a/drivers/cpufreq/cpufreq_interactive.c 1970-01-01 10:00:00.000000000 +1000 +++ b/drivers/cpufreq/cpufreq_interactive.c 2016-05-01 23:54:11.309702016 +1000 @@ -0,0 +1,1360 @@ +/* + * drivers/cpufreq/cpufreq_interactive.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +struct cpufreq_interactive_cpuinfo { + struct timer_list cpu_timer; + struct timer_list cpu_slack_timer; + spinlock_t load_lock; /* protects the next 4 fields */ + u64 time_in_idle; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; + struct cpufreq_policy *policy; + struct cpufreq_frequency_table *freq_table; + spinlock_t target_freq_lock; /*protects target freq */ + unsigned int target_freq; + unsigned int floor_freq; + u64 pol_floor_val_time; /* policy floor_validate_time */ + u64 loc_floor_val_time; /* per-cpu floor_validate_time */ + u64 pol_hispeed_val_time; /* policy hispeed_validate_time */ + u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */ + struct rw_semaphore enable_sem; + int governor_enabled; +}; + +static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); + +/* realtime thread handles frequency scaling */ +static struct task_struct *speedchange_task; +static cpumask_t speedchange_cpumask; +static spinlock_t speedchange_cpumask_lock; +static struct mutex gov_lock; + +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 90 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; + +#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) +#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE +static unsigned int default_above_hispeed_delay[] = { + DEFAULT_ABOVE_HISPEED_DELAY }; + +struct cpufreq_interactive_tunables { + int usage_count; + /* Hi speed to bump to from lo speed when load burst (default max) */ + unsigned int hispeed_freq; + /* Go to hi speed when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 99 + unsigned long go_hispeed_load; + /* Target load. Lower values result in higher CPU speeds. */ + spinlock_t target_loads_lock; + unsigned int *target_loads; + int ntarget_loads; + /* + * The minimum amount of time to spend at a frequency before we can ramp + * down. + */ +#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) + unsigned long min_sample_time; + /* + * The sample rate of the timer used to increase frequency + */ + unsigned long timer_rate; + /* + * Wait this long before raising speed above hispeed, by default a + * single timer interval. + */ + spinlock_t above_hispeed_delay_lock; + unsigned int *above_hispeed_delay; + int nabove_hispeed_delay; + /* Non-zero means indefinite speed boost active */ + int boost_val; + /* Duration of a boot pulse in usecs */ + int boostpulse_duration_val; + /* End time of boost pulse in ktime converted to usecs */ + u64 boostpulse_endtime; + bool boosted; + /* + * Max additional time to wait in idle, beyond timer_rate, at speeds + * above minimum before wakeup to reduce speed, or -1 if unnecessary. + */ +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) + int timer_slack_val; + bool io_is_busy; +}; + +/* + * HACK: FIXME: Bring back cpufreq_{get,put}_global_kobject() + * definition removed by upstream commit 8eec1020f0c0 "cpufreq: + * create cpu/cpufreq at boot time" to fix build failures. + */ +static int cpufreq_global_kobject_usage; + +int cpufreq_get_global_kobject(void) +{ + if (!cpufreq_global_kobject_usage++) + return kobject_add(cpufreq_global_kobject, + &cpu_subsys.dev_root->kobj, "%s", "cpufreq"); + + return 0; +} + +void cpufreq_put_global_kobject(void) +{ + if (!--cpufreq_global_kobject_usage) + kobject_del(cpufreq_global_kobject); +} + +/* For cases where we have single governor instance for system */ +static struct cpufreq_interactive_tunables *common_tunables; + +static struct attribute_group *get_sysfs_attr(void); + +static void cpufreq_interactive_timer_resched( + struct cpufreq_interactive_cpuinfo *pcpu) +{ + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + unsigned long expires; + unsigned long flags; + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time(smp_processor_id(), + &pcpu->time_in_idle_timestamp, + tunables->io_is_busy); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + expires = jiffies + usecs_to_jiffies(tunables->timer_rate); + mod_timer_pinned(&pcpu->cpu_timer, expires); + + if (tunables->timer_slack_val >= 0 && + pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(tunables->timer_slack_val); + mod_timer_pinned(&pcpu->cpu_slack_timer, expires); + } + + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} + +/* The caller shall take enable_sem write semaphore to avoid any timer race. + * The cpu_timer and cpu_slack_timer must be deactivated when calling this + * function. + */ +static void cpufreq_interactive_timer_start( + struct cpufreq_interactive_tunables *tunables, int cpu) +{ + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + unsigned long expires = jiffies + + usecs_to_jiffies(tunables->timer_rate); + unsigned long flags; + + pcpu->cpu_timer.expires = expires; + add_timer_on(&pcpu->cpu_timer, cpu); + if (tunables->timer_slack_val >= 0 && + pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(tunables->timer_slack_val); + pcpu->cpu_slack_timer.expires = expires; + add_timer_on(&pcpu->cpu_slack_timer, cpu); + } + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp, + tunables->io_is_busy); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} + +static unsigned int freq_to_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, + unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay - 1 && + freq >= tunables->above_hispeed_delay[i+1]; i += 2) + ; + + ret = tunables->above_hispeed_delay[i]; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; +} + +static unsigned int freq_to_targetload( + struct cpufreq_interactive_tunables *tunables, unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads - 1 && + freq >= tunables->target_loads[i+1]; i += 2) + ; + + ret = tunables->target_loads[i]; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} + +/* + * If increasing frequencies never map to a lower target load then + * choose_freq() will find the minimum frequency that does not exceed its + * target load given the current load. + */ +static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu, + unsigned int loadadjfreq) +{ + unsigned int freq = pcpu->policy->cur; + unsigned int prevfreq, freqmin, freqmax; + unsigned int tl; + int index; + + freqmin = 0; + freqmax = UINT_MAX; + + do { + prevfreq = freq; + tl = freq_to_targetload(pcpu->policy->governor_data, freq); + + /* + * Find the lowest frequency where the computed load is less + * than or equal to the target load. + */ + + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, loadadjfreq / tl, + CPUFREQ_RELATION_L, &index)) + break; + freq = pcpu->freq_table[index].frequency; + + if (freq > prevfreq) { + /* The previous frequency is too low. */ + freqmin = prevfreq; + + if (freq >= freqmax) { + /* + * Find the highest frequency that is less + * than freqmax. + */ + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmax - 1, CPUFREQ_RELATION_H, + &index)) + break; + freq = pcpu->freq_table[index].frequency; + + if (freq == freqmin) { + /* + * The first frequency below freqmax + * has already been found to be too + * low. freqmax is the lowest speed + * we found that is fast enough. + */ + freq = freqmax; + break; + } + } + } else if (freq < prevfreq) { + /* The previous frequency is high enough. */ + freqmax = prevfreq; + + if (freq <= freqmin) { + /* + * Find the lowest frequency that is higher + * than freqmin. + */ + if (cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmin + 1, CPUFREQ_RELATION_L, + &index)) + break; + freq = pcpu->freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; + } + } + + /* If same frequency chosen as previous then done. */ + } while (freq != prevfreq); + + return freq; +} + +static u64 update_load(int cpu) +{ + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + u64 now; + u64 now_idle; + unsigned int delta_idle; + unsigned int delta_time; + u64 active_time; + + now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); + delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); + delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + + if (delta_time <= delta_idle) + active_time = 0; + else + active_time = delta_time - delta_idle; + + pcpu->cputime_speedadj += active_time * pcpu->policy->cur; + + pcpu->time_in_idle = now_idle; + pcpu->time_in_idle_timestamp = now; + return now; +} + +static void cpufreq_interactive_timer(unsigned long data) +{ + u64 now; + unsigned int delta_time; + u64 cputime_speedadj; + int cpu_load; + struct cpufreq_interactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, data); + struct cpufreq_interactive_tunables *tunables = + pcpu->policy->governor_data; + unsigned int new_freq; + unsigned int loadadjfreq; + unsigned int index; + unsigned long flags; + u64 max_fvtime; + + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) + goto exit; + + spin_lock_irqsave(&pcpu->load_lock, flags); + now = update_load(data); + delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); + cputime_speedadj = pcpu->cputime_speedadj; + spin_unlock_irqrestore(&pcpu->load_lock, flags); + + if (WARN_ON_ONCE(!delta_time)) + goto rearm; + + spin_lock_irqsave(&pcpu->target_freq_lock, flags); + do_div(cputime_speedadj, delta_time); + loadadjfreq = (unsigned int)cputime_speedadj * 100; + cpu_load = loadadjfreq / pcpu->policy->cur; + tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime; + + if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { + if (pcpu->policy->cur < tunables->hispeed_freq) { + new_freq = tunables->hispeed_freq; + } else { + new_freq = choose_freq(pcpu, loadadjfreq); + + if (new_freq < tunables->hispeed_freq) + new_freq = tunables->hispeed_freq; + } + } else { + new_freq = choose_freq(pcpu, loadadjfreq); + if (new_freq > tunables->hispeed_freq && + pcpu->policy->cur < tunables->hispeed_freq) + new_freq = tunables->hispeed_freq; + } + + if (pcpu->policy->cur >= tunables->hispeed_freq && + new_freq > pcpu->policy->cur && + now - pcpu->pol_hispeed_val_time < + freq_to_above_hispeed_delay(tunables, pcpu->policy->cur)) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + + pcpu->loc_hispeed_val_time = now; + + if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, + new_freq, CPUFREQ_RELATION_L, + &index)) { + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + + new_freq = pcpu->freq_table[index].frequency; + + /* + * Do not scale below floor_freq unless we have been at or above the + * floor frequency for the minimum sample time since last validated. + */ + max_fvtime = max(pcpu->pol_floor_val_time, pcpu->loc_floor_val_time); + if (new_freq < pcpu->floor_freq && + pcpu->target_freq >= pcpu->policy->cur) { + if (now - max_fvtime < tunables->min_sample_time) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + } + + /* + * Update the timestamp for checking whether speed has been held at + * or above the selected frequency for a minimum of min_sample_time, + * if not boosted to hispeed_freq. If boosted to hispeed_freq then we + * allow the speed to drop as soon as the boostpulse duration expires + * (or the indefinite boost is turned off). + */ + + if (!tunables->boosted || new_freq > tunables->hispeed_freq) { + pcpu->floor_freq = new_freq; + if (pcpu->target_freq >= pcpu->policy->cur || + new_freq >= pcpu->policy->cur) + pcpu->loc_floor_val_time = now; + } + + if (pcpu->target_freq == new_freq && + pcpu->target_freq <= pcpu->policy->cur) { + trace_cpufreq_interactive_already( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + goto rearm; + } + + trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + + pcpu->target_freq = new_freq; + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + cpumask_set_cpu(data, &speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + wake_up_process(speedchange_task); + +rearm: + if (!timer_pending(&pcpu->cpu_timer)) + cpufreq_interactive_timer_resched(pcpu); + +exit: + up_read(&pcpu->enable_sem); + return; +} + +static void cpufreq_interactive_idle_end(void) +{ + struct cpufreq_interactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, smp_processor_id()); + + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return; + } + + /* Arm the timer for 1-2 ticks later if not already. */ + if (!timer_pending(&pcpu->cpu_timer)) { + cpufreq_interactive_timer_resched(pcpu); + } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { + del_timer(&pcpu->cpu_timer); + del_timer(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer(smp_processor_id()); + } + + up_read(&pcpu->enable_sem); +} + +static int cpufreq_interactive_speedchange_task(void *data) +{ + unsigned int cpu; + cpumask_t tmp_mask; + unsigned long flags; + struct cpufreq_interactive_cpuinfo *pcpu; + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + + if (cpumask_empty(&speedchange_cpumask)) { + spin_unlock_irqrestore(&speedchange_cpumask_lock, + flags); + schedule(); + + if (kthread_should_stop()) + break; + + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + } + + set_current_state(TASK_RUNNING); + tmp_mask = speedchange_cpumask; + cpumask_clear(&speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + + for_each_cpu(cpu, &tmp_mask) { + unsigned int j; + unsigned int max_freq = 0; + struct cpufreq_interactive_cpuinfo *pjcpu; + u64 hvt = ~0ULL, fvt = 0; + + pcpu = &per_cpu(cpuinfo, cpu); + if (!down_read_trylock(&pcpu->enable_sem)) + continue; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + continue; + } + + for_each_cpu(j, pcpu->policy->cpus) { + pjcpu = &per_cpu(cpuinfo, j); + + fvt = max(fvt, pjcpu->loc_floor_val_time); + if (pjcpu->target_freq > max_freq) { + max_freq = pjcpu->target_freq; + hvt = pjcpu->loc_hispeed_val_time; + } else if (pjcpu->target_freq == max_freq) { + hvt = min(hvt, pjcpu->loc_hispeed_val_time); + } + } + for_each_cpu(j, pcpu->policy->cpus) { + pjcpu = &per_cpu(cpuinfo, j); + pjcpu->pol_floor_val_time = fvt; + } + + if (max_freq != pcpu->policy->cur) { + __cpufreq_driver_target(pcpu->policy, + max_freq, + CPUFREQ_RELATION_H); + for_each_cpu(j, pcpu->policy->cpus) { + pjcpu = &per_cpu(cpuinfo, j); + pjcpu->pol_hispeed_val_time = hvt; + } + } + trace_cpufreq_interactive_setspeed(cpu, + pcpu->target_freq, + pcpu->policy->cur); + + up_read(&pcpu->enable_sem); + } + } + + return 0; +} + +static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunables) +{ + int i; + int anyboost = 0; + unsigned long flags[2]; + struct cpufreq_interactive_cpuinfo *pcpu; + + tunables->boosted = true; + + spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); + + for_each_online_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + if (tunables != pcpu->policy->governor_data) + continue; + + spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); + if (pcpu->target_freq < tunables->hispeed_freq) { + pcpu->target_freq = tunables->hispeed_freq; + cpumask_set_cpu(i, &speedchange_cpumask); + pcpu->pol_hispeed_val_time = + ktime_to_us(ktime_get()); + anyboost = 1; + } + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]); + } + + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); + + if (anyboost) + wake_up_process(speedchange_task); +} + +static int cpufreq_interactive_notifier( + struct notifier_block *nb, unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpufreq_interactive_cpuinfo *pcpu; + int cpu; + unsigned long flags; + + if (val == CPUFREQ_POSTCHANGE) { + pcpu = &per_cpu(cpuinfo, freq->cpu); + if (!down_read_trylock(&pcpu->enable_sem)) + return 0; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return 0; + } + + for_each_cpu(cpu, pcpu->policy->cpus) { + struct cpufreq_interactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, cpu); + if (cpu != freq->cpu) { + if (!down_read_trylock(&pjcpu->enable_sem)) + continue; + if (!pjcpu->governor_enabled) { + up_read(&pjcpu->enable_sem); + continue; + } + } + spin_lock_irqsave(&pjcpu->load_lock, flags); + update_load(cpu); + spin_unlock_irqrestore(&pjcpu->load_lock, flags); + if (cpu != freq->cpu) + up_read(&pjcpu->enable_sem); + } + + up_read(&pcpu->enable_sem); + } + return 0; +} + +static struct notifier_block cpufreq_notifier_block = { + .notifier_call = cpufreq_interactive_notifier, +}; + +static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) +{ + const char *cp; + int i; + int ntokens = 1; + unsigned int *tokenized_data; + int err = -EINVAL; + + cp = buf; + while ((cp = strpbrk(cp + 1, " :"))) + ntokens++; + + if (!(ntokens & 0x1)) + goto err; + + tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); + if (!tokenized_data) { + err = -ENOMEM; + goto err; + } + + cp = buf; + i = 0; + while (i < ntokens) { + if (sscanf(cp, "%u", &tokenized_data[i++]) != 1) + goto err_kfree; + + cp = strpbrk(cp, " :"); + if (!cp) + break; + cp++; + } + + if (i != ntokens) + goto err_kfree; + + *num_tokens = ntokens; + return tokenized_data; + +err_kfree: + kfree(tokenized_data); +err: + return ERR_PTR(err); +} + +static ssize_t show_target_loads( + struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + int i; + ssize_t ret = 0; + unsigned long flags; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads; i++) + ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} + +static ssize_t store_target_loads( + struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ntokens; + unsigned int *new_target_loads = NULL; + unsigned long flags; + + new_target_loads = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_target_loads)) + return PTR_RET(new_target_loads); + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + if (tunables->target_loads != default_target_loads) + kfree(tunables->target_loads); + tunables->target_loads = new_target_loads; + tunables->ntarget_loads = ntokens; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return count; +} + +static ssize_t show_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, char *buf) +{ + int i; + ssize_t ret = 0; + unsigned long flags; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay; i++) + ret += sprintf(buf + ret, "%u%s", + tunables->above_hispeed_delay[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return ret; +} + +static ssize_t store_above_hispeed_delay( + struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ntokens; + unsigned int *new_above_hispeed_delay = NULL; + unsigned long flags; + + new_above_hispeed_delay = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_above_hispeed_delay)) + return PTR_RET(new_above_hispeed_delay); + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + if (tunables->above_hispeed_delay != default_above_hispeed_delay) + kfree(tunables->above_hispeed_delay); + tunables->above_hispeed_delay = new_above_hispeed_delay; + tunables->nabove_hispeed_delay = ntokens; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + return count; + +} + +static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%u\n", tunables->hispeed_freq); +} + +static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + long unsigned int val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->hispeed_freq = val; + return count; +} + +static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%lu\n", tunables->go_hispeed_load); +} + +static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->go_hispeed_load = val; + return count; +} + +static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%lu\n", tunables->min_sample_time); +} + +static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->min_sample_time = val; + return count; +} + +static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%lu\n", tunables->timer_rate); +} + +static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val, val_round; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + val_round = jiffies_to_usecs(usecs_to_jiffies(val)); + if (val != val_round) + pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n", + val_round); + + tunables->timer_rate = val_round; + return count; +} + +static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%d\n", tunables->timer_slack_val); +} + +static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtol(buf, 10, &val); + if (ret < 0) + return ret; + + tunables->timer_slack_val = val; + return count; +} + +static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%d\n", tunables->boost_val); +} + +static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boost_val = val; + + if (tunables->boost_val) { + trace_cpufreq_interactive_boost("on"); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); + } else { + tunables->boostpulse_endtime = ktime_to_us(ktime_get()); + trace_cpufreq_interactive_unboost("off"); + } + + return count; +} + +static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + + tunables->boostpulse_duration_val; + trace_cpufreq_interactive_boost("pulse"); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); + return count; +} + +static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables + *tunables, char *buf) +{ + return sprintf(buf, "%d\n", tunables->boostpulse_duration_val); +} + +static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables + *tunables, const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boostpulse_duration_val = val; + return count; +} + +static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables, + char *buf) +{ + return sprintf(buf, "%u\n", tunables->io_is_busy); +} + +static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + tunables->io_is_busy = val; + return count; +} + +/* + * Create show/store routines + * - sys: One governor instance for complete SYSTEM + * - pol: One governor instance per struct cpufreq_policy + */ +#define show_gov_pol_sys(file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return show_##file_name(common_tunables, buf); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + return show_##file_name(policy->governor_data, buf); \ +} + +#define store_gov_pol_sys(file_name) \ +static ssize_t store_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, const char *buf, \ + size_t count) \ +{ \ + return store_##file_name(common_tunables, buf, count); \ +} \ + \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + return store_##file_name(policy->governor_data, buf, count); \ +} + +#define show_store_gov_pol_sys(file_name) \ +show_gov_pol_sys(file_name); \ +store_gov_pol_sys(file_name) + +show_store_gov_pol_sys(target_loads); +show_store_gov_pol_sys(above_hispeed_delay); +show_store_gov_pol_sys(hispeed_freq); +show_store_gov_pol_sys(go_hispeed_load); +show_store_gov_pol_sys(min_sample_time); +show_store_gov_pol_sys(timer_rate); +show_store_gov_pol_sys(timer_slack); +show_store_gov_pol_sys(boost); +store_gov_pol_sys(boostpulse); +show_store_gov_pol_sys(boostpulse_duration); +show_store_gov_pol_sys(io_is_busy); + +#define gov_sys_attr_rw(_name) \ +static struct global_attr _name##_gov_sys = \ +__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) + +#define gov_pol_attr_rw(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define gov_sys_pol_attr_rw(_name) \ + gov_sys_attr_rw(_name); \ + gov_pol_attr_rw(_name) + +gov_sys_pol_attr_rw(target_loads); +gov_sys_pol_attr_rw(above_hispeed_delay); +gov_sys_pol_attr_rw(hispeed_freq); +gov_sys_pol_attr_rw(go_hispeed_load); +gov_sys_pol_attr_rw(min_sample_time); +gov_sys_pol_attr_rw(timer_rate); +gov_sys_pol_attr_rw(timer_slack); +gov_sys_pol_attr_rw(boost); +gov_sys_pol_attr_rw(boostpulse_duration); +gov_sys_pol_attr_rw(io_is_busy); + +static struct global_attr boostpulse_gov_sys = + __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys); + +static struct freq_attr boostpulse_gov_pol = + __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol); + +/* One Governor instance for entire system */ +static struct attribute *interactive_attributes_gov_sys[] = { + &target_loads_gov_sys.attr, + &above_hispeed_delay_gov_sys.attr, + &hispeed_freq_gov_sys.attr, + &go_hispeed_load_gov_sys.attr, + &min_sample_time_gov_sys.attr, + &timer_rate_gov_sys.attr, + &timer_slack_gov_sys.attr, + &boost_gov_sys.attr, + &boostpulse_gov_sys.attr, + &boostpulse_duration_gov_sys.attr, + &io_is_busy_gov_sys.attr, + NULL, +}; + +static struct attribute_group interactive_attr_group_gov_sys = { + .attrs = interactive_attributes_gov_sys, + .name = "interactive", +}; + +/* Per policy governor instance */ +static struct attribute *interactive_attributes_gov_pol[] = { + &target_loads_gov_pol.attr, + &above_hispeed_delay_gov_pol.attr, + &hispeed_freq_gov_pol.attr, + &go_hispeed_load_gov_pol.attr, + &min_sample_time_gov_pol.attr, + &timer_rate_gov_pol.attr, + &timer_slack_gov_pol.attr, + &boost_gov_pol.attr, + &boostpulse_gov_pol.attr, + &boostpulse_duration_gov_pol.attr, + &io_is_busy_gov_pol.attr, + NULL, +}; + +static struct attribute_group interactive_attr_group_gov_pol = { + .attrs = interactive_attributes_gov_pol, + .name = "interactive", +}; + +static struct attribute_group *get_sysfs_attr(void) +{ + if (have_governor_per_policy()) + return &interactive_attr_group_gov_pol; + else + return &interactive_attr_group_gov_sys; +} + +static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, + unsigned long val, + void *data) +{ + if (val == IDLE_END) + cpufreq_interactive_idle_end(); + + return 0; +} + +static struct notifier_block cpufreq_interactive_idle_nb = { + .notifier_call = cpufreq_interactive_idle_notifier, +}; + +static int cpufreq_governor_interactive(struct cpufreq_policy *policy, + unsigned int event) +{ + int rc; + unsigned int j; + struct cpufreq_interactive_cpuinfo *pcpu; + struct cpufreq_frequency_table *freq_table; + struct cpufreq_interactive_tunables *tunables; + unsigned long flags; + + if (have_governor_per_policy()) + tunables = policy->governor_data; + else + tunables = common_tunables; + + WARN_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT)); + + switch (event) { + case CPUFREQ_GOV_POLICY_INIT: + if (have_governor_per_policy()) { + WARN_ON(tunables); + } else if (tunables) { + tunables->usage_count++; + policy->governor_data = tunables; + return 0; + } + + tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); + if (!tunables) { + pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); + return -ENOMEM; + } + + tunables->usage_count = 1; + tunables->above_hispeed_delay = default_above_hispeed_delay; + tunables->nabove_hispeed_delay = + ARRAY_SIZE(default_above_hispeed_delay); + tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; + tunables->target_loads = default_target_loads; + tunables->ntarget_loads = ARRAY_SIZE(default_target_loads); + tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME; + tunables->timer_rate = DEFAULT_TIMER_RATE; + tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; + tunables->timer_slack_val = DEFAULT_TIMER_SLACK; + + spin_lock_init(&tunables->target_loads_lock); + spin_lock_init(&tunables->above_hispeed_delay_lock); + + policy->governor_data = tunables; + if (!have_governor_per_policy()) { + common_tunables = tunables; + WARN_ON(cpufreq_get_global_kobject()); + } + + rc = sysfs_create_group(get_governor_parent_kobj(policy), + get_sysfs_attr()); + if (rc) { + kfree(tunables); + policy->governor_data = NULL; + if (!have_governor_per_policy()) { + common_tunables = NULL; + cpufreq_put_global_kobject(); + } + return rc; + } + + if (!policy->governor->initialized) { + idle_notifier_register(&cpufreq_interactive_idle_nb); + cpufreq_register_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + break; + + case CPUFREQ_GOV_POLICY_EXIT: + if (!--tunables->usage_count) { + if (policy->governor->initialized == 1) { + cpufreq_unregister_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + idle_notifier_unregister(&cpufreq_interactive_idle_nb); + } + + sysfs_remove_group(get_governor_parent_kobj(policy), + get_sysfs_attr()); + + if (!have_governor_per_policy()) + cpufreq_put_global_kobject(); + + kfree(tunables); + common_tunables = NULL; + } + + policy->governor_data = NULL; + break; + + case CPUFREQ_GOV_START: + mutex_lock(&gov_lock); + + freq_table = cpufreq_frequency_get_table(policy->cpu); + if (!tunables->hispeed_freq) + tunables->hispeed_freq = policy->max; + + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + pcpu->policy = policy; + pcpu->target_freq = policy->cur; + pcpu->freq_table = freq_table; + pcpu->floor_freq = pcpu->target_freq; + pcpu->pol_floor_val_time = + ktime_to_us(ktime_get()); + pcpu->loc_floor_val_time = pcpu->pol_floor_val_time; + pcpu->pol_hispeed_val_time = pcpu->pol_floor_val_time; + pcpu->loc_hispeed_val_time = pcpu->pol_floor_val_time; + down_write(&pcpu->enable_sem); + del_timer_sync(&pcpu->cpu_timer); + del_timer_sync(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer_start(tunables, j); + pcpu->governor_enabled = 1; + up_write(&pcpu->enable_sem); + } + + mutex_unlock(&gov_lock); + break; + + case CPUFREQ_GOV_STOP: + mutex_lock(&gov_lock); + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + down_write(&pcpu->enable_sem); + pcpu->governor_enabled = 0; + del_timer_sync(&pcpu->cpu_timer); + del_timer_sync(&pcpu->cpu_slack_timer); + up_write(&pcpu->enable_sem); + } + + mutex_unlock(&gov_lock); + break; + + case CPUFREQ_GOV_LIMITS: + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, + policy->min, CPUFREQ_RELATION_L); + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + + down_read(&pcpu->enable_sem); + if (pcpu->governor_enabled == 0) { + up_read(&pcpu->enable_sem); + continue; + } + + spin_lock_irqsave(&pcpu->target_freq_lock, flags); + if (policy->max < pcpu->target_freq) + pcpu->target_freq = policy->max; + else if (policy->min > pcpu->target_freq) + pcpu->target_freq = policy->min; + + spin_unlock_irqrestore(&pcpu->target_freq_lock, flags); + up_read(&pcpu->enable_sem); + } + break; + } + return 0; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_interactive = { + .name = "interactive", + .governor = cpufreq_governor_interactive, + .max_transition_latency = 10000000, + .owner = THIS_MODULE, +}; + +static void cpufreq_interactive_nop_timer(unsigned long data) +{ +} + +static int __init cpufreq_interactive_init(void) +{ + unsigned int i; + struct cpufreq_interactive_cpuinfo *pcpu; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + /* Initalize per-cpu timers */ + for_each_possible_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + init_timer_deferrable(&pcpu->cpu_timer); + pcpu->cpu_timer.function = cpufreq_interactive_timer; + pcpu->cpu_timer.data = i; + init_timer(&pcpu->cpu_slack_timer); + pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; + spin_lock_init(&pcpu->load_lock); + spin_lock_init(&pcpu->target_freq_lock); + init_rwsem(&pcpu->enable_sem); + } + + spin_lock_init(&speedchange_cpumask_lock); + mutex_init(&gov_lock); + speedchange_task = + kthread_create(cpufreq_interactive_speedchange_task, NULL, + "cfinteractive"); + if (IS_ERR(speedchange_task)) + return PTR_ERR(speedchange_task); + + sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); + get_task_struct(speedchange_task); + + /* NB: wake up so the thread does not look hung to the freezer */ + wake_up_process(speedchange_task); + + return cpufreq_register_governor(&cpufreq_gov_interactive); +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +fs_initcall(cpufreq_interactive_init); +#else +module_init(cpufreq_interactive_init); +#endif + +static void __exit cpufreq_interactive_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_interactive); + kthread_stop(speedchange_task); + put_task_struct(speedchange_task); +} + +module_exit(cpufreq_interactive_exit); + +MODULE_AUTHOR("Mike Chan "); +MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for " + "Latency sensitive workloads"); +MODULE_LICENSE("GPL"); diff -Naur a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig --- a/drivers/cpufreq/Kconfig 2016-05-01 23:54:23.733232399 +1000 +++ b/drivers/cpufreq/Kconfig 2016-05-01 23:54:24.938251516 +1000 @@ -102,6 +102,16 @@ Be aware that not all cpufreq drivers support the conservative governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. + +config CPU_FREQ_DEFAULT_GOV_INTERACTIVE + bool "interactive" + select CPU_FREQ_GOV_INTERACTIVE + help + Use the CPUFreq governor 'interactive' as default. This allows + you to get a full dynamic cpu frequency capable system by simply + loading your cpufreq low-level hardware driver, using the + 'interactive' governor for latency-sensitive workloads. + endchoice config CPU_FREQ_GOV_PERFORMANCE @@ -157,6 +167,20 @@ For details, take a look at linux/Documentation/cpu-freq. + If in doubt, say N. + +config CPU_FREQ_GOV_INTERACTIVE + bool "'interactive' cpufreq policy governor" + help + 'interactive' - This driver adds a dynamic cpufreq policy governor + designed for latency-sensitive workloads. + + This governor attempts to reduce the latency of clock + increases so that the system is more responsive to + interactive workloads. + + For details, take a look at linux/Documentation/cpu-freq. + If in doubt, say N. config CPU_FREQ_GOV_CONSERVATIVE diff -Naur a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile --- a/drivers/cpufreq/Makefile 2016-05-01 23:54:27.993299985 +1000 +++ b/drivers/cpufreq/Makefile 2016-05-01 23:54:30.633341873 +1000 @@ -10,6 +10,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o +obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o diff -Naur a/include/linux/cpufreq.h b/include/linux/cpufreq.h --- a/include/linux/cpufreq.h 2016-05-01 23:54:33.950061168 +1000 +++ b/include/linux/cpufreq.h 2016-05-01 23:54:34.855075531 +1000 @@ -487,6 +487,9 @@ #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) extern struct cpufreq_governor cpufreq_gov_conservative; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE) +extern struct cpufreq_governor cpufreq_gov_interactive; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_interactive) #endif /********************************************************************* diff -Naur a/include/linux/cpu.h b/include/linux/cpu.h --- a/include/linux/cpu.h 2016-05-01 23:54:00.184525619 +1000 +++ b/include/linux/cpu.h 2016-05-01 23:54:08.216319629 +1000 @@ -290,4 +290,11 @@ bool cpu_report_death(void); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#define IDLE_START 1 +#define IDLE_END 2 + +void idle_notifier_register(struct notifier_block *n); +void idle_notifier_unregister(struct notifier_block *n); +void idle_notifier_call_chain(unsigned long val); + #endif /* _LINUX_CPU_H_ */ diff -Naur a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h --- a/include/trace/events/cpufreq_interactive.h 1970-01-01 10:00:00.000000000 +1000 +++ b/include/trace/events/cpufreq_interactive.h 2016-05-01 23:54:14.238081791 +1000 @@ -0,0 +1,112 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cpufreq_interactive + +#if !defined(_TRACE_CPUFREQ_INTERACTIVE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CPUFREQ_INTERACTIVE_H + +#include + +DECLARE_EVENT_CLASS(set, + TP_PROTO(u32 cpu_id, unsigned long targfreq, + unsigned long actualfreq), + TP_ARGS(cpu_id, targfreq, actualfreq), + + TP_STRUCT__entry( + __field( u32, cpu_id ) + __field(unsigned long, targfreq ) + __field(unsigned long, actualfreq ) + ), + + TP_fast_assign( + __entry->cpu_id = (u32) cpu_id; + __entry->targfreq = targfreq; + __entry->actualfreq = actualfreq; + ), + + TP_printk("cpu=%u targ=%lu actual=%lu", + __entry->cpu_id, __entry->targfreq, + __entry->actualfreq) +); + +DEFINE_EVENT(set, cpufreq_interactive_setspeed, + TP_PROTO(u32 cpu_id, unsigned long targfreq, + unsigned long actualfreq), + TP_ARGS(cpu_id, targfreq, actualfreq) +); + +DECLARE_EVENT_CLASS(loadeval, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), + + TP_STRUCT__entry( + __field(unsigned long, cpu_id ) + __field(unsigned long, load ) + __field(unsigned long, curtarg ) + __field(unsigned long, curactual ) + __field(unsigned long, newtarg ) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->load = load; + __entry->curtarg = curtarg; + __entry->curactual = curactual; + __entry->newtarg = newtarg; + ), + + TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", + __entry->cpu_id, __entry->load, __entry->curtarg, + __entry->curactual, __entry->newtarg) +); + +DEFINE_EVENT(loadeval, cpufreq_interactive_target, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) +); + +DEFINE_EVENT(loadeval, cpufreq_interactive_already, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) +); + +DEFINE_EVENT(loadeval, cpufreq_interactive_notyet, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) +); + +TRACE_EVENT(cpufreq_interactive_boost, + TP_PROTO(const char *s), + TP_ARGS(s), + TP_STRUCT__entry( + __string(s, s) + ), + TP_fast_assign( + __assign_str(s, s); + ), + TP_printk("%s", __get_str(s)) +); + +TRACE_EVENT(cpufreq_interactive_unboost, + TP_PROTO(const char *s), + TP_ARGS(s), + TP_STRUCT__entry( + __string(s, s) + ), + TP_fast_assign( + __assign_str(s, s); + ), + TP_printk("%s", __get_str(s)) +); + +#endif /* _TRACE_CPUFREQ_INTERACTIVE_H */ + +/* This part must be outside protection */ +#include diff -Naur a/kernel/cpu.c b/kernel/cpu.c --- a/kernel/cpu.c 2016-05-01 23:54:03.506244948 +1000 +++ b/kernel/cpu.c 2016-05-01 23:54:08.216319629 +1000 @@ -827,3 +827,23 @@ { cpumask_copy(to_cpumask(cpu_online_bits), src); } + +static ATOMIC_NOTIFIER_HEAD(idle_notifier); + +void idle_notifier_register(struct notifier_block *n) +{ + atomic_notifier_chain_register(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_register); + +void idle_notifier_unregister(struct notifier_block *n) +{ + atomic_notifier_chain_unregister(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_unregister); + +void idle_notifier_call_chain(unsigned long val) +{ + atomic_notifier_call_chain(&idle_notifier, val, NULL); +} +EXPORT_SYMBOL_GPL(idle_notifier_call_chain);