userspace governor study
一、userspace governor是一种用户可以自己手动调整自己cpu频率的governor,即在linux目录下:/sys/devices/system/cpu/cpu0/cpufreq/,有一个参数scaling_setspeed,是这个governor转有的,其他governor是不能对其进行读写操作的,只有这个governor才能这样做。
二、下面来讲讲它实现的code。
与前面powersave governor一样,要对这个governor进行register。函数如下:
staticint __init cpufreq_gov_userspace_init(void)
{
returncpufreq_register_governor(&cpufreq_gov_userspace);
}
直接返回注册的函数,下面看看userspace governor的cpufreq_governor的结构体是怎样定义的,即cpufreq_gov_userspace。Code如下:
struct cpufreq_governor cpufreq_gov_userspace = {
.name = "userspace",
.governor = cpufreq_governor_userspace,
.store_setspeed = cpufreq_set,
.show_setspeed = show_speed,
.owner = THIS_MODULE,
};
很明显,比powersave governor多了两个回调函数的实现:
cpufreq_set:用户设置的频率;
show_speed:现实用户设置的频率。
这两个是userspace governor特有的。name,governor,owner是所有governor所具有的。
注册完之后,让我们进入对这个governor进行初始化的函数:cpufreq_governor_userspace。
讲解如下:
static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
unsigned int event)
{
unsigned int cpu = policy->cpu;
int rc = 0;
switch (event) {
case CPUFREQ_GOV_START:
if (!cpu_online(cpu))
return -EINVAL;
BUG_ON(!policy->cur);
mutex_lock(&userspace_mutex);
if (cpus_using_userspace_governor == 0) {
cpufreq_register_notifier(
&userspace_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
cpus_using_userspace_governor++;
per_cpu(cpu_is_managed, cpu) = 1;
per_cpu(cpu_min_freq, cpu) = policy->min;
per_cpu(cpu_max_freq, cpu) = policy->max;
per_cpu(cpu_cur_freq, cpu) = policy->cur;
per_cpu(cpu_set_freq, cpu) = policy->cur;
pr_debug("managing cpu %u started "
"(%u - %u kHz, currently %u kHz)\n",
cpu,
per_cpu(cpu_min_freq, cpu),
per_cpu(cpu_max_freq, cpu),
per_cpu(cpu_cur_freq, cpu));
mutex_unlock(&userspace_mutex);
break;
case CPUFREQ_GOV_STOP:
mutex_lock(&userspace_mutex);
cpus_using_userspace_governor--;
if (cpus_using_userspace_governor == 0) {
cpufreq_unregister_notifier(
&userspace_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
per_cpu(cpu_is_managed, cpu) = 0;
per_cpu(cpu_min_freq, cpu) = 0;
per_cpu(cpu_max_freq, cpu) = 0;
per_cpu(cpu_set_freq, cpu) = 0;
pr_debug("managing cpu %u stopped\n", cpu);
mutex_unlock(&userspace_mutex);
break;
case CPUFREQ_GOV_LIMITS:
mutex_lock(&userspace_mutex);
pr_debug("limit event for cpu %u: %u - %u kHz, "
"currently %u kHz, last set to %u kHz\n",
cpu, policy->min, policy->max,
per_cpu(cpu_cur_freq, cpu),
per_cpu(cpu_set_freq, cpu));
if (policy->max < per_cpu(cpu_set_freq, cpu)) {
__cpufreq_driver_target(policy, policy->max,
CPUFREQ_RELATION_H);
} else if (policy->min > per_cpu(cpu_set_freq, cpu)) {
__cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_L);
} else {
__cpufreq_driver_target(policy,
per_cpu(cpu_set_freq, cpu),
CPUFREQ_RELATION_L);
}
per_cpu(cpu_min_freq, cpu) = policy->min;
per_cpu(cpu_max_freq, cpu) = policy->max;
per_cpu(cpu_cur_freq, cpu) = policy->cur;
mutex_unlock(&userspace_mutex);
break;
}
return rc;
}
这个函数的讲解如下:
1、event=CPUFREQ_GOV_START,cpus_using_userspace_governor是计算有多少个cpucore使用了这个governor,开始的时候为0,注册一个通知链,目的是通知cpu,需要替换成用户设置的频率。对于通知链,下面会接着讲。
2、cpus_using_userspace_governor++的意思是统计有多少个cpucore使用这个governor;
3、其实event为CPUFREQ_GOV_STOP时是对相应的event为START进行相反的操作。
4、event为LIMITS时,是对相应的频率进行调整。
static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
{
int ret = -EINVAL;
pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
mutex_lock(&userspace_mutex);
if (!per_cpu(cpu_is_managed, policy->cpu))
goto err;
per_cpu(cpu_set_freq, policy->cpu) = freq;
if (freq < per_cpu(cpu_min_freq, policy->cpu))
freq = per_cpu(cpu_min_freq, policy->cpu);
if (freq > per_cpu(cpu_max_freq, policy->cpu))
freq = per_cpu(cpu_max_freq, policy->cpu);
/*
* We're safe from concurrent calls to ->target() here
* as we hold the userspace_mutex lock. If we were calling
* cpufreq_driver_target, a deadlock situation might occur:
* A: cpufreq_set (lock userspace_mutex) ->
* cpufreq_driver_target(lock policy->lock)
* B: cpufreq_set_policy(lock policy->lock) ->
* __cpufreq_governor ->
* cpufreq_governor_userspace (lock userspace_mutex)
*/
ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
err:
mutex_unlock(&userspace_mutex);
return ret;
}
1、第一个if语句判断当前cpucore是否使用的是userspacegovernor;
2、
/*将用户设置的frequency写入相应的cpucore。*/
per_cpu(cpu_set_freq,policy->cpu) = freq;
3、
/*用户设置的频率值是否合法,否则进行相应的设置。*/
if(freq < per_cpu(cpu_min_freq, policy->cpu))
freq= per_cpu(cpu_min_freq, policy->cpu);
if(freq > per_cpu(cpu_max_freq, policy->cpu))
freq= per_cpu(cpu_max_freq, policy->cpu);
4、
/*执行频率的改变,通知core层。*/
ret= __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
2、static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)
这个函数是现实用户设置的频率,是一个很简单的函数,如下:
static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)
{
return sprintf(buf, "%u\n", per_cpu(cpu_cur_freq, policy->cpu));
}
直接在sys接口中显示。
三、通知链的作用。
在governor的初始化的过程中注册了一个通知链,如下所示,是一个通知链结构体。
static struct notifier_block userspace_cpufreq_notifier_block = {
.notifier_call = userspace_cpufreq_notifier
};
.notifier_call是回调函数,当某件事情发生的时候就会触发这个函数的执行。
这个函数的具体实现如下:
static int
userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
if (!per_cpu(cpu_is_managed, freq->cpu))
return 0;
if (val == CPUFREQ_POSTCHANGE) {
pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n",
freq->cpu, freq->new);
per_cpu(cpu_cur_freq, freq->cpu) = freq->new;
}
return 0;
}
可以看到有一个if语句的判断,如下:
if(val == CPUFREQ_POSTCHANGE) {
pr_debug("savingcpu_cur_freq of cpu %u to be %u kHz\n",
freq->cpu,freq->new);
per_cpu(cpu_cur_freq,freq->cpu) = freq->new;
}
它的真实的意思是,当频率发生改变的话,那么就将用户设置的频率,即新的频率修改为当前频率。
对于参数 CPUFREQ_POSTCHANGE。
这个参数定义在cpufreq.h中。是cpu频率变化的转换方式,如下:
/*这个是前向改变,cpu频率改变之后,在调整这个参数loops_per_jiffy*/
#define CPUFREQ_PRECHANGE (0)
/*后向改变,通知当前的cpu当前的频率要使用新的频率之后在修改loops_per_jiffy。*/
#define CPUFREQ_POSTCHANGE (1)
/*下面两个与powermanagement相关*/
#define CPUFREQ_RESUMECHANGE (8)
#define CPUFREQ_SUSPENDCHANGE (9)
可以追踪到源码中,cpufreq.c中,这两个参数有什么具体的不同,即频率改变方式不同所做的处理是怎样的?尤其是这个参数loops_per_jiffy是做什么的。
void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
{
struct cpufreq_policy *policy;
BUG_ON(irqs_disabled());
freqs->flags = cpufreq_driver->flags;
pr_debug("notification %u of frequency transition to %u kHz\n",
state, freqs->new);
policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
switch (state) {
case CPUFREQ_PRECHANGE:
/* detect if the driver reported a value as "old frequency"
* which is not equal to what the cpufreq core thinks is
* "old frequency".
*/
if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
if ((policy) && (policy->cpu == freqs->cpu) &&
(policy->cur) && (policy->cur != freqs->old)) {
pr_debug("Warning: CPU frequency is"
" %u, cpufreq assumed %u kHz.\n",
freqs->old, policy->cur);
freqs->old = policy->cur;
}
}
srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
CPUFREQ_PRECHANGE, freqs);
adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
break;
case CPUFREQ_POSTCHANGE:
adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
(unsigned long)freqs->cpu);
trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
trace_cpu_frequency(freqs->new, freqs->cpu);
srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
CPUFREQ_POSTCHANGE, freqs);
if (likely(policy) && likely(policy->cpu == freqs->cpu))
policy->cur = freqs->new;
break;
}
}
可以看到参数 CPUFREQ_POSTCHANGE和CPUFREQ_PRECHANGE巨大的不同仅仅是adjust_jiffies函数的位置不一样,其他的就是判断调整频率是否合法。
下面就来看看这个函数。
/**
*adjust_jiffies - adjust the system "loops_per_jiffy"
*
*This function alters the system "loops_per_jiffy" for theclock
*speed change. Note that loops_per_jiffy cannot be updated on SMP
*systems as each CPU might be scaled differently. So, use the arch
*per-CPU loops_per_jiffy value wherever possible.
*/
#ifndef CONFIG_SMP
static unsigned long l_p_j_ref;
static unsigned int l_p_j_ref_freq;
static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
{
if (ci->flags & CPUFREQ_CONST_LOOPS)
return;
if (!l_p_j_ref_freq) {
l_p_j_ref = loops_per_jiffy;
l_p_j_ref_freq = ci->old;
pr_debug("saving %lu as reference value for loops_per_jiffy; "
"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
}
if ((val == CPUFREQ_POSTCHANGE && ci->old != ci->new) ||
(val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
ci->new);
pr_debug("scaling loops_per_jiffy to %lu "
"for frequency %u kHz\n", loops_per_jiffy, ci->new);
}
}
#else
static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
{
return;
}
#endif
系统在启动过程中,内核会计算处理器在一个jiffy时间内运行一个内部的delay循环的次数。jiffy的含义是系统定时器2个连续的节拍之间的间隔。如果你所期待的那样,该计算必须被校准到你的CPU的处理速度。校准的结果被存储在称为loops_per_jiffy的内核变量中。使用loops_per_jiffy的一个场合是某设备驱动希望进行小的微秒级别的延迟的时候。具体的讲解可参考:
1、http://www.cnblogs.com/cute/archive/2011/05/09/2041468.html
2、http://linux.chinaunix.net/techdoc/net/2009/02/09/1061523.shtml
现在我们大概的知道了userspacegovernor的具体是怎么实现的了。
有些时候追本溯源是痛苦的,但是熬过之后就是彩虹了。逻辑比较混乱,有什么问题可以留言,望谅解,下班回家咯,哈哈。