osq对mutex的优化

mutex类似于互斥信号量。oqs 可以对mutex 性能有所优化，具体分析如下：首先
mutex锁在kernel中有两种初始化方式，分别为：
静态方式：
#define DEFINE_MUTEX(mutexname) \
	struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
其中

动态方式：
#define mutex_init(mutex)						\
do {									\
	static struct lock_class_key __key;				\
									\
	__mutex_init((mutex), #mutex, &__key);				\
} while (0)

我们这里以动态方式为例
void
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
	atomic_long_set(&lock->owner, 0);
	spin_lock_init(&lock->wait_lock);
	INIT_LIST_HEAD(&lock->wait_list);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
	osq_lock_init(&lock->osq);
#endif

	debug_mutex_init(lock, name, key);
}
可以看到在__mutex_init 中有定义一个原子变量lock->owner，一个spinlock 锁lock->wait_lock，一个list lock->wait_list。
其中debug_mutex_init 用于debug，默认情况下为空函数.如使能CONFIG_MUTEX_SPIN_ON_OWNER的话，还定义了一个osq锁.

下来我们看看mutex如何获得锁
void __sched mutex_lock(struct mutex *lock)
{
	#获得mutex锁的过程可能会sleep
	might_sleep();
	#从这里知道mutex 锁 可以分为fast或者slow方式获得锁，如果之前没有人占用锁，就走的是fast方式
	if (!__mutex_trylock_fast(lock))
		__mutex_lock_slowpath(lock);
}
我们看看mutex 如何快速获得mutex锁
static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
{
	unsigned long curr = (unsigned long)current;
	#这里会比较lock->owner的值是否等于0，如果等于零的话，则将curr赋值给lock->owner，并返回lock->owner 原来的值零，
	#所以这里if 条件成立，返回true
	if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
		return true;

	return false;
}
原来fast方式获得mutex锁就是判断lock->owner 是否为零
下来我们再看看slow方式获得mutex锁的过程
static noinline void __sched
__mutex_lock_slowpath(struct mutex *lock)
{
	__mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static int __sched
__mutex_lock(struct mutex *lock, long state, unsigned int subclass,
	     struct lockdep_map *nest_lock, unsigned long ip)
{
	return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
}
__mutex_lock_common 这个函数很长很长，我们只看osq 部分
static __always_inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
		    struct lockdep_map *nest_lock, unsigned long ip,
		    struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
	#再次检查是否可以快速获得锁，如果不能的话，则通过osp开得到锁
	if (__mutex_trylock(lock) ||
	    mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) {
		/* got the lock, yay! */
		lock_acquired(&lock->dep_map, ip);
		if (use_ww_ctx && ww_ctx)
			ww_mutex_set_context_fastpath(ww, ww_ctx);
		preempt_enable();
		return 0;
	}
	}
osq获得锁的函数如下：
	
	static __always_inline bool
mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
		      const bool use_ww_ctx, struct mutex_waiter *waiter)
{
	if (!waiter) {
		/*
		 * The purpose of the mutex_can_spin_on_owner() function is
		 * to eliminate the overhead of osq_lock() and osq_unlock()
		 * in case spinning isn't possible. As a waiter-spinner
		 * is not going to take OSQ lock anyway, there is no need
		 * to call mutex_can_spin_on_owner().
		 */
		 #检查当前持有锁的ower并没有运行，而是在临界区执行
		if (!mutex_can_spin_on_owner(lock))
			goto fail;

		/*
		 * In order to avoid a stampede of mutex spinners trying to
		 * acquire the mutex all at once, the spinners need to take a
		 * MCS (queued) lock first before spinning on the owner field.
		 */
		 #获得osq锁
		if (!osq_lock(&lock->osq))
			goto fail;
	}

	for (;;) {
		struct task_struct *owner;

		/* Try to acquire the mutex... */
		#尝试获得mutex，当ower为null是获得锁
		owner = __mutex_trylock_or_owner(lock);
		if (!owner)
			break;

		/*
		 * There's an owner, wait for it to either
		 * release the lock or go to sleep.
		 */
		 #等待前面获得ower来释放锁或者sleep
		if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter))
			goto fail_unlock;

		/*
		 * The cpu_relax() call is a compiler barrier which forces
		 * everything in this loop to be re-loaded. We don't need
		 * memory barriers as we'll eventually observe the right
		 * values at the cost of a few extra spins.
		 */
		cpu_relax();
	}
#正常情况解锁osq并返回
	if (!waiter)
		osq_unlock(&lock->osq);

	return true;


fail_unlock:
	if (!waiter)
		osq_unlock(&lock->osq);

fail:
	/*
	 * If we fell out of the spin path because of need_resched(),
	 * reschedule now, before we try-lock the mutex. This avoids getting
	 * scheduled out right after we obtained the mutex.
	 */
	 #当获得osq锁失败的时候，如果需要调度，则sleep一段时间并让出cpu，并返回failed。
	if (need_resched()) {
		/*
		 * We _should_ have TASK_RUNNING here, but just in case
		 * we do not, make it so, otherwise we might get stuck.
		 */
		__set_current_state(TASK_RUNNING);
		schedule_preempt_disabled();
	}

	return false;
}
从这里看在mutex slow path中如果使能osq的话，可以使用osq来优化mutex的性能
猜你喜欢