userspace governor study

userspace governor study

一、userspace governor是一种用户可以自己手动调整自己cpu频率的governor,即在linux目录下:/sys/devices/system/cpu/cpu0/cpufreq/,有一个参数scaling_setspeed,是这个governor转有的,其他governor是不能对其进行读写操作的,只有这个governor才能这样做。

二、下面来讲讲它实现的code

与前面powersave governor一样,要对这个governor进行register。函数如下:

staticint __init cpufreq_gov_userspace_init(void)

{

         returncpufreq_register_governor(&cpufreq_gov_userspace);

}

直接返回注册的函数,下面看看userspace governorcpufreq_governor的结构体是怎样定义的,即cpufreq_gov_userspaceCode如下:

struct cpufreq_governor cpufreq_gov_userspace = {
	.name		= "userspace",
	.governor	= cpufreq_governor_userspace,
	.store_setspeed	= cpufreq_set,
	.show_setspeed	= show_speed,
	.owner		= THIS_MODULE,
};

很明显,比powersave governor多了两个回调函数的实现:

cpufreq_set:用户设置的频率;

show_speed:现实用户设置的频率。

这两个是userspace governor特有的。namegovernorowner是所有governor所具有的。

注册完之后,让我们进入对这个governor进行初始化的函数:cpufreq_governor_userspace

讲解如下:

static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
				   unsigned int event)
{
	unsigned int cpu = policy->cpu;
	int rc = 0;

	switch (event) {
	case CPUFREQ_GOV_START:
		if (!cpu_online(cpu))
			return -EINVAL;
		BUG_ON(!policy->cur);
		mutex_lock(&userspace_mutex);

		if (cpus_using_userspace_governor == 0) {
			cpufreq_register_notifier(
					&userspace_cpufreq_notifier_block,
					CPUFREQ_TRANSITION_NOTIFIER);
		}
		cpus_using_userspace_governor++;

		per_cpu(cpu_is_managed, cpu) = 1;
		per_cpu(cpu_min_freq, cpu) = policy->min;
		per_cpu(cpu_max_freq, cpu) = policy->max;
		per_cpu(cpu_cur_freq, cpu) = policy->cur;
		per_cpu(cpu_set_freq, cpu) = policy->cur;
		pr_debug("managing cpu %u started "
			"(%u - %u kHz, currently %u kHz)\n",
				cpu,
				per_cpu(cpu_min_freq, cpu),
				per_cpu(cpu_max_freq, cpu),
				per_cpu(cpu_cur_freq, cpu));

		mutex_unlock(&userspace_mutex);
		break;
	case CPUFREQ_GOV_STOP:
		mutex_lock(&userspace_mutex);
		cpus_using_userspace_governor--;
		if (cpus_using_userspace_governor == 0) {
			cpufreq_unregister_notifier(
					&userspace_cpufreq_notifier_block,
					CPUFREQ_TRANSITION_NOTIFIER);
		}

		per_cpu(cpu_is_managed, cpu) = 0;
		per_cpu(cpu_min_freq, cpu) = 0;
		per_cpu(cpu_max_freq, cpu) = 0;
		per_cpu(cpu_set_freq, cpu) = 0;
		pr_debug("managing cpu %u stopped\n", cpu);
		mutex_unlock(&userspace_mutex);
		break;
	case CPUFREQ_GOV_LIMITS:
		mutex_lock(&userspace_mutex);
		pr_debug("limit event for cpu %u: %u - %u kHz, "
			"currently %u kHz, last set to %u kHz\n",
			cpu, policy->min, policy->max,
			per_cpu(cpu_cur_freq, cpu),
			per_cpu(cpu_set_freq, cpu));
		if (policy->max < per_cpu(cpu_set_freq, cpu)) {
			__cpufreq_driver_target(policy, policy->max,
						CPUFREQ_RELATION_H);
		} else if (policy->min > per_cpu(cpu_set_freq, cpu)) {
			__cpufreq_driver_target(policy, policy->min,
						CPUFREQ_RELATION_L);
		} else {
			__cpufreq_driver_target(policy,
						per_cpu(cpu_set_freq, cpu),
						CPUFREQ_RELATION_L);
		}
		per_cpu(cpu_min_freq, cpu) = policy->min;
		per_cpu(cpu_max_freq, cpu) = policy->max;
		per_cpu(cpu_cur_freq, cpu) = policy->cur;
		mutex_unlock(&userspace_mutex);
		break;
	}
	return rc;
}
这个函数的讲解如下:

1、event=CPUFREQ_GOV_STARTcpus_using_userspace_governor是计算有多少个cpucore使用了这个governor,开始的时候为0,注册一个通知链,目的是通知cpu,需要替换成用户设置的频率。对于通知链,下面会接着讲。

2、cpus_using_userspace_governor++的意思是统计有多少个cpucore使用这个governor;

3、其实event为CPUFREQ_GOV_STOP时是对相应的event为START进行相反的操作。

4、event为LIMITS时,是对相应的频率进行调整。

static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
{
	int ret = -EINVAL;

	pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);

	mutex_lock(&userspace_mutex);
	if (!per_cpu(cpu_is_managed, policy->cpu))
		goto err;

	per_cpu(cpu_set_freq, policy->cpu) = freq;

	if (freq < per_cpu(cpu_min_freq, policy->cpu))
		freq = per_cpu(cpu_min_freq, policy->cpu);
	if (freq > per_cpu(cpu_max_freq, policy->cpu))
		freq = per_cpu(cpu_max_freq, policy->cpu);

	/*
	 * We're safe from concurrent calls to ->target() here
	 * as we hold the userspace_mutex lock. If we were calling
	 * cpufreq_driver_target, a deadlock situation might occur:
	 * A: cpufreq_set (lock userspace_mutex) ->
	 *      cpufreq_driver_target(lock policy->lock)
	 * B: cpufreq_set_policy(lock policy->lock) ->
	 *      __cpufreq_governor ->
	 *         cpufreq_governor_userspace (lock userspace_mutex)
	 */
	ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);

 err:
	mutex_unlock(&userspace_mutex);
	return ret;
}


1、第一个if语句判断当前cpucore是否使用的是userspacegovernor

2、

/*将用户设置的frequency写入相应的cpucore*/

per_cpu(cpu_set_freq,policy->cpu) = freq;

3、

/*用户设置的频率值是否合法,否则进行相应的设置。*/

if(freq < per_cpu(cpu_min_freq, policy->cpu))

      freq= per_cpu(cpu_min_freq, policy->cpu);

if(freq > per_cpu(cpu_max_freq, policy->cpu))

      freq= per_cpu(cpu_max_freq, policy->cpu);



4、

/*执行频率的改变,通知core层。*/

ret= __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);


2static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)

这个函数是现实用户设置的频率,是一个很简单的函数,如下:

static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)
{
	return sprintf(buf, "%u\n", per_cpu(cpu_cur_freq, policy->cpu));
}

直接在sys接口中显示。


三、通知链的作用。

governor的初始化的过程中注册了一个通知链,如下所示,是一个通知链结构体。


static struct notifier_block userspace_cpufreq_notifier_block = {
	.notifier_call  = userspace_cpufreq_notifier
};

.notifier_call是回调函数,当某件事情发生的时候就会触发这个函数的执行。

这个函数的具体实现如下:

static int
userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
	void *data)
{
	struct cpufreq_freqs *freq = data;

	if (!per_cpu(cpu_is_managed, freq->cpu))
		return 0;

	if (val == CPUFREQ_POSTCHANGE) {
		pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n",
				freq->cpu, freq->new);
		per_cpu(cpu_cur_freq, freq->cpu) = freq->new;
	}

	return 0;
}

可以看到有一个if语句的判断,如下:

if(val == CPUFREQ_POSTCHANGE) {

       pr_debug("savingcpu_cur_freq of cpu %u to be %u kHz\n",

           freq->cpu,freq->new);

       per_cpu(cpu_cur_freq,freq->cpu) = freq->new;

}

它的真实的意思是,当频率发生改变的话,那么就将用户设置的频率,即新的频率修改为当前频率。

对于参数 CPUFREQ_POSTCHANGE

这个参数定义在cpufreq.h中。是cpu频率变化的转换方式,如下:

/*这个是前向改变,cpu频率改变之后,在调整这个参数loops_per_jiffy*/

#define CPUFREQ_PRECHANGE (0)

/*后向改变,通知当前的cpu当前的频率要使用新的频率之后在修改loops_per_jiffy*/

#define CPUFREQ_POSTCHANGE (1)

/*下面两个与powermanagement相关*/

#define CPUFREQ_RESUMECHANGE (8)

#define CPUFREQ_SUSPENDCHANGE (9)


可以追踪到源码中,cpufreq.c中,这两个参数有什么具体的不同,即频率改变方式不同所做的处理是怎样的?尤其是这个参数loops_per_jiffy是做什么的。

void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
{
	struct cpufreq_policy *policy;

	BUG_ON(irqs_disabled());

	freqs->flags = cpufreq_driver->flags;
	pr_debug("notification %u of frequency transition to %u kHz\n",
		state, freqs->new);

	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
	switch (state) {

	case CPUFREQ_PRECHANGE:
		/* detect if the driver reported a value as "old frequency"
		 * which is not equal to what the cpufreq core thinks is
		 * "old frequency".
		 */
		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
			if ((policy) && (policy->cpu == freqs->cpu) &&
			    (policy->cur) && (policy->cur != freqs->old)) {
				pr_debug("Warning: CPU frequency is"
					" %u, cpufreq assumed %u kHz.\n",
					freqs->old, policy->cur);
				freqs->old = policy->cur;
			}
		}
		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
				CPUFREQ_PRECHANGE, freqs);
		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
		break;

	case CPUFREQ_POSTCHANGE:
		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
			(unsigned long)freqs->cpu);
		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
		trace_cpu_frequency(freqs->new, freqs->cpu);
		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
				CPUFREQ_POSTCHANGE, freqs);
		if (likely(policy) && likely(policy->cpu == freqs->cpu))
			policy->cur = freqs->new;
		break;
	}
}

可以看到参数 CPUFREQ_POSTCHANGECPUFREQ_PRECHANGE巨大的不同仅仅是adjust_jiffies函数的位置不一样,其他的就是判断调整频率是否合法。

下面就来看看这个函数。


/**

*adjust_jiffies - adjust the system "loops_per_jiffy"

*

*This function alters the system "loops_per_jiffy" for theclock

*speed change. Note that loops_per_jiffy cannot be updated on SMP

*systems as each CPU might be scaled differently. So, use the arch

*per-CPU loops_per_jiffy value wherever possible.

*/

#ifndef CONFIG_SMP
static unsigned long l_p_j_ref;
static unsigned int  l_p_j_ref_freq;

static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
{
	if (ci->flags & CPUFREQ_CONST_LOOPS)
		return;

	if (!l_p_j_ref_freq) {
		l_p_j_ref = loops_per_jiffy;
		l_p_j_ref_freq = ci->old;
		pr_debug("saving %lu as reference value for loops_per_jiffy; "
			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
	}
	if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
								ci->new);
		pr_debug("scaling loops_per_jiffy to %lu "
			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
	}
}
#else
static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
{
	return;
}
#endif

系统在启动过程中,内核会计算处理器在一个jiffy时间内运行一个内部的delay循环的次数。jiffy的含义是系统定时器2个连续的节拍之间的间隔。如果你所期待的那样,该计算必须被校准到你的CPU的处理速度。校准的结果被存储在称为loops_per_jiffy的内核变量中。使用loops_per_jiffy的一个场合是某设备驱动希望进行小的微秒级别的延迟的时候。具体的讲解可参考:

1http://www.cnblogs.com/cute/archive/2011/05/09/2041468.html

2http://linux.chinaunix.net/techdoc/net/2009/02/09/1061523.shtml

现在我们大概的知道了userspacegovernor的具体是怎么实现的了。

有些时候追本溯源是痛苦的,但是熬过之后就是彩虹了。逻辑比较混乱,有什么问题可以留言,望谅解,下班回家咯,哈哈。




猜你喜欢

转载自blog.csdn.net/wuming_422103632/article/details/17028245