操作系统之内核栈的切换实验

实验内容

现在的Linux 0.11采用TSS和一条指令就能完成任务切换，虽然简单，但这指令的执行时间却很长，在实现任务切换时大概需要 200 多个时钟周期。而通过堆栈实现任务切换可能要更快，而且采用堆栈的切换还可以使用指令流水的并行优化技术，同时又使得CPU的设计变得简单。所以无论是 Linux还是 Windows，进程/线程的切换都没有使用 Intel 提供的这种TSS切换手段，而都是通过堆栈实现的。本次实践项目就是将Linux 0.11中采用的TSS切换部分去掉，取而代之的是基于堆栈的切换程序。具体的说，就是将Linux 0.11中的switch_to实现去掉，写成一段基于堆栈切换的代码。

编写switch_to函数

由于涉及到堆栈、ldt地址空间、pcb等切换内容，这些需要精细的操作，所以使用汇编代码编写这段程序。

.align 2
switch_to:
	pushl %ebp  
	movl %esp,%ebp  # get current sp
	pushl %ecx  # push the next LDT 
	pushl %ebx  # push the next pcb pointer
	pushl %eax  # push eax
	movl 8(%ebp),%ebx  # get next pcb pointer
	cmpl %ebx,current  # compare it that if the next is the current process
	je 1f
	
	movl %ebx,%eax  
	xchgl %eax,current  # change the PCB
	
	movl tss,%ecx
	addl $4096,%ebx
	movl %ebx,4(%ecx)  # rewrite the esp0 in the tss
	
	movl %esp,KERNEL_STACK(%eax)  # set the current kernel stack with  sp
	movl 8(%ebp),%ebx  # get the next pcb pointer
	movl KERNEL_STACK(%ebx), %esp  # set the sp with the next kernel_stack
	
	movl 12(%ebp),%ecx  # get the parameter of the _LDT(next)
	lldt %cx  # modify the LDTR register

	movl $0x17,%ecx
	mov %cx,%fs  # rewrite the register of fs

	cmpl %eax,last_task_used_math  # 和后面的clts配合来处理协处理器
	
	jne 1f
	clts
1:
	popl %eax  # pop eax
	popl %ebx  # pop ebx
	popl %ecx  # pop ecx
	popl %ebp  # pop ebp  
	ret  # return

修改schedule函数

// linux-0.11/kernel/schedule.c
	// old code
	...
	if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
		c = (*p)->counter, next = i;
	....
		switch_to(next);

	// new code
...
	if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
		c = (*p)->counter, next = i, pnext = *p;
	....
		switch_to(pnext, _LDT(next);

这里使用switch_to还需要在schedule.c的前面声明一下：

extern long switch_to(struct task_struct *p, unsigned long address);

修改PCB结构体

因为现在采用了堆栈方式切换，所以pcb结构体需要携带当前进程的栈顶指针krnstack。

	...
	// linux-0.11/include/linux/sched.h
	long priority;
	long krnstack;  /* the pointer of the kernel stack */
	long signal;
	...

因为加在了pcb结构体中第4个位置，所以偏移量为12，那么它后续的变量地址也相应地偏移4个字节。这样的话结构体中的一些汇编硬编码的值也要跟着改变，下面是调整后的汇编硬编码：

// linux-0.11/kernel/system_call.s
state	= 0		# these are offsets into the task-struct.
counter	= 4
priority = 8
krnstack = 12
signal	= 16
sigaction = 20		# MUST be 16 (=len of sigaction)
blocked = (33*16 + 4)

修改fork函数

在copy_process函数中加入下面这段代码，目的是复制当前内核栈的所有信息到下个进程的内核栈中，最后将内核栈指针设置到栈顶处：

	// linux-0.11/kernel/fork.c
	...
	long *krnstack = 0;
	krnstack = (long *)(PAGE_SIZE + (long)p);
	*(--krnstack) = ss & 0xffff;
	*(--krnstack) = esp;
	*(--krnstack) = eflags;
	*(--krnstack) = cs & 0xffff;
	*(--krnstack) = eip;

	*(--krnstack) = ds & 0xffff;
	*(--krnstack) = es & 0xffff;
	*(--krnstack) = fs & 0xffff;
	*(--krnstack) = gs & 0xffff;
	
	*(--krnstack) = esi;
	*(--krnstack) = edi;
	*(--krnstack) = edx;

	*(--krnstack) = (long)first_return_from_kernel;
	
	*(--krnstack) = ebp;
	*(--krnstack) = ecx;
	*(--krnstack) = ebx;
	*(--krnstack) = 0;  /* eax */

	p->krnstack = (long)krnstack;
	...

first_return_from_kernel这个变量其实是一段汇编代码的符号，作用是等到switch_to切换pcb后，也就是切换了内核栈以后，有一段退出出栈代码，等出栈eax、ebx、ecx、ebp后，就要ret了。这个ret就是出栈执行first_return_from_kernel这个汇编符号下的代码：

// linux-0.11/kernel/system_call.s
.align 2
first_return_from_kernel:
	popl %edx
	popl %edi
	popl %esi

	pop %gs
	pop %fs
	pop %es
	pop %ds
	iret

这段程序执行的时候就是刚好出执行完switch_to，进入内部的ret指令执行了。发现这里的代码都是出栈，这里出的栈就是当初copy_process压的栈。执行完这步后就准备进入用户态的出栈了，所以这个符号名字取作“首先从内核返回”

修改init_task

pcb结构体改变后，这个由此结构体的变量当然要更新啦：

// linux-0.11/include/sched.h
// old code
/* state etc */	{ 0,15,15, \
// new code
/* state etc */	{ 0,15,15, PAGE_SIZE+(long)&init_task, \

将栈指针初始化为栈底指针，因为这个时候还没有数据压栈，所以，栈顶指针与栈底指针相等。

注意schedule函数

 	// linux-0.11/kernel/schd.h 
	...
	struct task_struct ** p;
	// do not forget it that initing the pointer of pnext
	struct task_struct * pnext = &(init_task.task);
	...

别忘了初始化pnext时，一定要赋值初始化任务的指针哈，不然，系统是无法跑起来的。我在这里卡了快1天了。

实验结果

在这里插入图片描述
可以正常启动了，，，，，

总结

这个实验最难的内核栈的复制部分和switch_to部分，需要好好琢磨。