Linux-do_fork()函数及其他重要函数简介

前言

内核版本2.6.11.1,每个内核版本都会有所差异。在代码中已添加注释所以就没更多的文字说明。

1、do_fork()

do_fork()函数负责处理clone()、fork()、vfork()系统调用,

/* Fork a new task - this creates a new program thread.
 * This is called indirectly via a small wrapper
 */
asmlinkage int sys_fork(struct pt_regs *regs)
{
    
    
	return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
}

/* Clone a task - this clones the calling program thread.
 * This is called indirectly via a small wrapper
 */
asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
			 int __user *parent_tidptr, int tls_val,
			 int __user *child_tidptr, struct pt_regs *regs)
{
    
    
	if (!newsp)
		newsp = regs->ARM_sp;

	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
}

asmlinkage int sys_vfork(struct pt_regs *regs)
{
    
    
	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
}

这三个创建进程的函数最终都会调用到do_fork()。

do_fork()函数原型如下:

/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)


参数列表如下:

参数 描述
clone_flags 低字节指定了进程结束时发送到父进程的信号代码,通常选择SIGCHLD信号。剩余的3个字节给clone标志组用于编码。
stack_start 表示把用户态堆栈指针赋值给子进程的esp寄存器。
regs 指向通用寄存器的指针,通用寄存器的值是在用户态切换到内核态时被保存到内核堆栈中的
stack_size 未使用(总被设置为0)
parent_tidptr 表示父进程的用户态变量地址,该父进程具有与新轻量级进程相同的 PID,只有在CLONE_PARENT_SETTID标志被设置时才有意义
child_tidptr 表示新轻量级进程用户态变量地址,该进程具有这一类进程的 PID。只有在CLONE_CHILD_SETTID标志被设置时才有意义

2、do_fork()的处理流程

/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
    
    
	struct task_struct *p;
	int trace = 0;
	/*根据pidmap_array的位图来获取PID*/
	long pid = alloc_pidmap();

	if (pid < 0)
		return -EAGAIN;
		/* 检查父进程的ptrace字段(current->ptrace):如果它的值不等于0,表示有另外的进程在跟踪父进程,
		 * 并且,do_fork()检查debug程序是否想跟踪子进程。如果子进程不是内核线程,
		 * 那么设置子进程的CLONE_PTRACE标志,并把CLONE__UNTRACHED标志清零。
		*/
	if (unlikely(current->ptrace)) {
    
    
		trace = fork_traceflag (clone_flags);
		if (trace)
			clone_flags |= CLONE_PTRACE;
	}
	/*复制进程描述符。如果所有必须的资源都是可用的,该函数返回刚创建的task_struct描述符的地址。*/
	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	if (!IS_ERR(p)) {
    
    
		struct completion vfork;

		if (clone_flags & CLONE_VFORK) {
    
    
			p->vfork_done = &vfork;
			init_completion(&vfork);
		}
		/*
		 * 如果父进程被跟踪或设置了CLONE_STOPPED标志
		 * 那么子进程在启动时将会收到一个SIGSTOP信号
		 * 这样子子进程就是以暂停状态启动的
		 */
		if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
    
    
			/*
			 * We'll start up with an immediate SIGSTOP.
			 */
			/*把SIGSTOP信号加入到进程描述符的未决信号的集合中*/ 
			sigaddset(&p->pending.signal, SIGSTOP);
			/*在子进程启动的时候就会收到一个SIGSTOP信号,此时子进程就会被暂停*/ 
			set_tsk_thread_flag(p, TIF_SIGPENDING);
		}

		if (!(clone_flags & CLONE_STOPPED))
			wake_up_new_task(p, clone_flags); /*把子进程添加到CPU的运行队列中去*/
		else
			/*
			* 如果CLONE_STOPPED标志被设置,就把子进程设置为TASK_STOPPED状态,
			* 让它等待一个唤醒信号
			*/
			p->state = TASK_STOPPED;

		/*如果父进程被跟踪,则把子进程的PID存放在父进程的ptrace_message字段中。
		 *调用ptrace_notify()函数使当前进程停止运行,并向当前进程的父进程发送SIGCHID信号
		 *子进程的祖父进程就是跟踪父进程的debugger进程。SIGCHID信号通知debugger进程:current已经创建了
		 *一个子进程,可以通过ptrace_message字段获取子进程的PID
		 */
		if (unlikely (trace)) {
    
    
			current->ptrace_message = pid;
			ptrace_notify ((trace << 8) | SIGTRAP);
		}

		/*
		 * 如果设置了CLONE_VFORK,也就是说这是对vfork()的调用
		 * 那么就将父进ERRUO程设置成阻塞状态
		 * 并发送通知给一个跟踪者(如果父进程激活了跟踪功能的话)。
		 * 这是通过把父进程放在等待队列中,并让它保持TASK_UNINTERRUPTIBLEZ状态。
		 * 直到子进程调用exit()或者execv()来实现的
		 */
		if (clone_flags & CLONE_VFORK) {
    
    
			wait_for_completion(&vfork);
			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
		}
	} else {
    
    
		free_pidmap(pid);
		pid = PTR_ERR(p);
	}
	//函数结束并返回子进程的PID
	return pid;
}

3、do_fork中其他重要函数

3.1、fork_traceflag

static inline int fork_traceflag (unsigned clone_flags)
{
    
    
	/*如果设置了CLONE_UNTRACED标志,那么代表CLONE_PTRACE标志失去作用,也就不是跟踪状态,直接返回0*/
	if (clone_flags & CLONE_UNTRACED)
		return 0;	
	else if (clone_flags & CLONE_VFORK) {
    
     /*若果设置了CLONE_VFORK标志*/
		if (current->ptrace & PT_TRACE_VFORK)   /*如果ptrace字段 设置了PT_TRACE_VFORK*/
			return PTRACE_EVENT_VFORK;    //返回PTRACE_EVENT_VFORK(被跟踪的vfrok事件)
	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
    
     //如果没有设置SIGCHID信号 
		if (current->ptrace & PT_TRACE_CLONE)  //判断ptrace字段 设置了PT_TRACE_CLONE
			return PTRACE_EVENT_CLONE;   //返回一个PTRACE_EVENT_CLONE(被跟踪的CLONE事件)
	} else if (current->ptrace & PT_TRACE_FORK) /*如果ptrace字段 设置了PT_TRACE_FORK*/
		return PTRACE_EVENT_FORK;   /*返回一个PTRACE_EVENT_FORK(被跟踪的fork事件)*/

	return 0;   /*都不是的话返回0*/
}

3.2、copy_process

/*
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags). The actual kick-off is left to the caller.
 */
static task_t *copy_process(unsigned long clone_flags,
				 unsigned long stack_start,
				 struct pt_regs *regs,
				 unsigned long stack_size,
				 int __user *parent_tidptr,
				 int __user *child_tidptr,
				 int pid)
{
    
    
	int retval;
	struct task_struct *p = NULL;

	/*检查标志:CLONE_FS与CLONE_NEWNS一起使用的话 返回错误码-22*/
	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
		return ERR_PTR(-EINVAL);

	/*
	 * Thread groups must share signals as well, and detached threads
	 * can only be started up within the thread group.
	 */
	/*CLONE_THREAD标志被设置但CLONE_SIGHAND标志被清0 (同一线程组中的轻量级进程必须共享信号) 返回错误码-22*/
	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
		return ERR_PTR(-EINVAL);

	/*
	 * Shared signal handlers imply shared VM. By way of the above,
	 * thread groups also imply shared VM. Blocking this case allows
	 * for various simplifications in other code.
	 */
	/*CLONE_SIGHAND标志被设置但CLONE_VM标志被清0(共享信号处理程序的轻量级进程也必须共享内存描述符) */
	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
		return ERR_PTR(-EINVAL);

	/*执行安全检查 其实是个空函数*/
	retval = security_task_create(clone_flags);
	if (retval)
		goto fork_out;

	retval = -ENOMEM;
	/*为子进程获取进程描述符*/
	p = dup_task_struct(current);
	if (!p)
		goto fork_out;

	retval = -EAGAIN;
	/*首先看前面的两个if,第一个if里面的rlim数组包含在task_sturct数组中。
	 *对进程占用的资源数做出限制,rlim[RLIMIT_NPROC]限制了改进程用户
	 *可以拥有的总进程数量,如果当前用户所拥有的进程数量超过了
	 *规定的最大拥有进程数量,在内核中就直接goto bad_fork_free了。
	 *第2个if使用了capable()函数来对权限做出检查,检查是否有权
	 *对指定的资源进行操作,该函数返回0则代表无权操作。
	*/
	if (atomic_read(&p->user->processes) >=
			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
    
    
		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
				p->user != &root_user)
			goto bad_fork_free;
	}
	/*给user_struct的使用计数器加一*/
	atomic_inc(&p->user->__count);
	/*给用户所拥有的进程的计数器加1*/
	atomic_inc(&p->user->processes);
	get_group_info(p->group_info);

	/*
	 * If multiple threads are within copy_process(), then this check
	 * triggers too late. This doesn't hurt, the check is only there
	 * to stop root fork bombs.
	 */
	/*检查创建的进程是否超过了系统进程总量*/
	if (nr_threads >= max_threads)
		goto bad_fork_cleanup_count;

	if (!try_module_get(p->thread_info->exec_domain->module))
		goto bad_fork_cleanup_count;

	if (p->binfmt && !try_module_get(p->binfmt->module))
		goto bad_fork_cleanup_put_domain;

	/*初始化did_exec字段为0,它记录了进程发出的execve()系统调用的次数*/
	p->did_exec = 0;

	/*更新task_struct中的flags*/
	copy_flags(clone_flags, p);

	/*把新进程的PID存入task_struct中的pid字段*/
	p->pid = pid;
	retval = -EFAULT;

	/*如果设置了CLONE_PARENT_SETTID标志位,那么就把子进程的PID复制到参数parent_tidptr指向的用户变量中去*/
	if (clone_flags & CLONE_PARENT_SETTID)
		if (put_user(p->pid, parent_tidptr))
			goto bad_fork_cleanup;

	p->proc_dentry = NULL;

	/*初始化进程描述符中的两个链表和自旋锁*/
	INIT_LIST_HEAD(&p->children);
	INIT_LIST_HEAD(&p->sibling);
	p->vfork_done = NULL;
	spin_lock_init(&p->alloc_lock);
	spin_lock_init(&p->proc_lock);

	clear_tsk_thread_flag(p, TIF_SIGPENDING);

	/*初始化挂起的信号*/
	init_sigpending(&p->pending);

	p->it_real_value = 0;
	p->it_real_incr = 0;
	p->it_virt_value = cputime_zero;
	p->it_virt_incr = cputime_zero;
	p->it_prof_value = cputime_zero;
	p->it_prof_incr = cputime_zero;

	/*初始化定时器*/
	init_timer(&p->real_timer);
	p->real_timer.data = (unsigned long) p;

	p->utime = cputime_zero;
	p->stime = cputime_zero;
	p->rchar = 0;		/* I/O counter: bytes read */
	p->wchar = 0;		/* I/O counter: bytes written */
	p->syscr = 0;		/* I/O counter: read syscalls */
	p->syscw = 0;		/* I/O counter: write syscalls */
	acct_clear_integrals(p);

	p->lock_depth = -1;		/* -1 = no lock */
	do_posix_clock_monotonic_gettime(&p->start_time);
	p->security = NULL;
	p->io_context = NULL;
	p->io_wait = NULL;
	p->audit_context = NULL;
#ifdef CONFIG_NUMA
 	p->mempolicy = mpol_copy(p->mempolicy);
 	if (IS_ERR(p->mempolicy)) {
    
    
 		retval = PTR_ERR(p->mempolicy);
 		p->mempolicy = NULL;
 		goto bad_fork_cleanup;
 	}
#endif

	p->tgid = p->pid;
	if (clone_flags & CLONE_THREAD)
		p->tgid = current->tgid;
	if ((retval = security_task_alloc(p)))
		goto bad_fork_cleanup_policy;
	if ((retval = audit_alloc(p)))
		goto bad_fork_cleanup_security;
	/* copy all the process information */
	if ((retval = copy_semundo(clone_flags, p)))
		goto bad_fork_cleanup_audit;
	if ((retval = copy_files(clone_flags, p)))
		goto bad_fork_cleanup_semundo;
	if ((retval = copy_fs(clone_flags, p)))
		goto bad_fork_cleanup_files;
	if ((retval = copy_sighand(clone_flags, p)))
		goto bad_fork_cleanup_fs;
	if ((retval = copy_signal(clone_flags, p)))
		goto bad_fork_cleanup_sighand;
	if ((retval = copy_mm(clone_flags, p)))
		goto bad_fork_cleanup_signal;
	if ((retval = copy_keys(clone_flags, p)))
		goto bad_fork_cleanup_mm;
	if ((retval = copy_namespace(clone_flags, p)))
		goto bad_fork_cleanup_keys;
	/**/
	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
	if (retval)
		goto bad_fork_cleanup_namespace;

	/*如果设置了CLONE_CHILD_SETTID标志,那么就把child_tidptr的值赋值给p->set_child_tid字段*/
	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
	/*
	 * Clear TID on mm_release()?
	 */
	/*如果设置了CLONE_CHILD_CLEARTID标志,那么就把child_tidptr的值赋值给p->sclear_child_tid字段*/
	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;

	/*
	 * Syscall tracing should be turned off in the child regardless
	 * of CLONE_PTRACE.
	 */
	/*清除子进程thread_info结构中的TIF_SYSCALL_TRACE标志*/
	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

	/* Our parent execution domain becomes current domain
	   These must match for thread signalling to apply */
	   
	p->parent_exec_id = p->self_exec_id;

	/* ok, now we should be set up.. */
	/*如果设置了CLONE_THREAD标志,那么就把exit_signal置为-1*/
	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
	p->pdeath_signal = 0;
	p->exit_state = 0;

	/* Perform scheduler related setup 执行调度程序相关设置*/
	/*完成对新进程调度程序数据结构的初始化,此函数把新进程的状态设置为TASK_RUNNING*/
	sched_fork(p);

	/*
	 * Ok, make it visible to the rest of the system.
	 * We dont wake it up yet.
	 */
	p->group_leader = p;
	INIT_LIST_HEAD(&p->ptrace_children);
	INIT_LIST_HEAD(&p->ptrace_list);

	/* Need tasklist lock for parent etc handling! */
	write_lock_irq(&tasklist_lock);

	/*
	 * The task hasn't been attached yet, so cpus_allowed mask cannot
	 * have changed. The cpus_allowed mask of the parent may have
	 * changed after it was copied first time, and it may then move to
	 * another CPU - so we re-copy it here and set the child's CPU to
	 * the parent's CPU. This avoids alot of nasty races.
	 */
	p->cpus_allowed = current->cpus_allowed;
	/*设置子进程运行的CPU  smp_processor_id()函数返回本地CPU号*/
	set_task_cpu(p, smp_processor_id());

	/*
	 * Check for pending SIGKILL! The new thread should not be allowed
	 * to slip out of an OOM kill. (or normal SIGKILL.)
	 */
	if (sigismember(&current->pending.signal, SIGKILL)) {
    
    
		write_unlock_irq(&tasklist_lock);
		retval = -EINTR;
		goto bad_fork_cleanup_namespace;
	}

	/* CLONE_PARENT re-uses the old parent */
	/*如果设置了CLONE_PARENT和CLONE_THREAD标志,那么当前进程的real_parent就为当前进程的real_parent,
	 *否则子进程的real_parent为当前进程
	*/
	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
		p->real_parent = current->real_parent;
	else
		p->real_parent = current;
	p->parent = p->real_parent;

	if (clone_flags & CLONE_THREAD) {
    
    
		spin_lock(&current->sighand->siglock);
		/*
		 * Important: if an exit-all has been started then
		 * do not create this new thread - the whole thread
		 * group is supposed to exit anyway.
		 */
		if (current->signal->flags & SIGNAL_GROUP_EXIT) {
    
    
			spin_unlock(&current->sighand->siglock);
			write_unlock_irq(&tasklist_lock);
			retval = -EAGAIN;
			goto bad_fork_cleanup_namespace;
		}
		p->group_leader = current->group_leader;

		if (current->signal->group_stop_count > 0) {
    
    
			/*
			 * There is an all-stop in progress for the group.
			 * We ourselves will stop as soon as we check signals.
			 * Make the new thread part of that group stop too.
			 */
			current->signal->group_stop_count++;
			set_tsk_thread_flag(p, TIF_SIGPENDING);
		}

		spin_unlock(&current->sighand->siglock);
	}
	/*把新进程插入到进程链表中去*/
	SET_LINKS(p);
	/*如果新进程的PT_PTRACED标志被设置,表示子进程必须被跟踪,那么就把当前进程的parent赋值给新进程
	  并把新进程插入到调试进程的链表中*/
	if (unlikely(p->ptrace & PT_PTRACED))
		__ptrace_link(p, current->parent);

	/*把新进程的PID插入到pid_hash[type][pid_hashfn(nr)]散列表*/
	attach_pid(p, PIDTYPE_PID, p->pid);
	attach_pid(p, PIDTYPE_TGID, p->tgid);
	if (thread_group_leader(p)) {
    
    
		attach_pid(p, PIDTYPE_PGID, process_group(p));
		attach_pid(p, PIDTYPE_SID, p->signal->session);
		if (p->pid)
			__get_cpu_var(process_counts)++;
	}
	/*新进程已经加入到了进程集合,此时nr_threads递增*/
	nr_threads++;
	/*递增total_forks变量以记录被创建的进程地数量*/
	total_forks++;

	write_unlock_irq(&tasklist_lock);
	retval = 0;
/*函数终止,并返回新进程的进程描述符指针*/
fork_out:
	if (retval)
		return ERR_PTR(retval);
	return p;

bad_fork_cleanup_namespace:
	exit_namespace(p);
bad_fork_cleanup_keys:
	exit_keys(p);
bad_fork_cleanup_mm:
	if (p->mm)
		mmput(p->mm);
bad_fork_cleanup_signal:
	exit_signal(p);
bad_fork_cleanup_sighand:
	exit_sighand(p);
bad_fork_cleanup_fs:
	exit_fs(p); /* blocking */
bad_fork_cleanup_files:
	exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
	exit_sem(p);
bad_fork_cleanup_audit:
	audit_free(p);
bad_fork_cleanup_security:
	security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
	mpol_free(p->mempolicy);
#endif
bad_fork_cleanup:
	if (p->binfmt)
		module_put(p->binfmt->module);
bad_fork_cleanup_put_domain:
	module_put(p->thread_info->exec_domain->module);
bad_fork_cleanup_count:
	put_group_info(p->group_info);
	atomic_dec(&p->user->processes);
	free_uid(p->user);
bad_fork_free:
	free_task(p);
	goto fork_out;
}

3.3、dup_task_struct

static struct task_struct *dup_task_struct(struct task_struct *orig)
{
    
    
	struct task_struct *tsk;
	struct thread_info *ti;

	/*空函数 已被注释
	 *函数作用:把FPU、MMX和SSE/SSE2寄存器的内容保存到父进程的thread_info结构中。稍后,会把这些值
	 *赋值到子进程的thread_info结构中
	*/
	prepare_to_copy(orig);

	/*为新进程获取进程描述符,并将描述符地址保存在tsk局部变量中*/
	tsk = alloc_task_struct();
	if (!tsk)
		return NULL;
	/*为新进程分配线程描述符,存放新进程的thread_info结构和内核栈,这块区域的大小由__get_free_pages(GFP_KERNEL,1)分配一页的大小*/
	ti = alloc_thread_info(tsk);
	if (!ti) {
    
    
		free_task_struct(tsk);
		return NULL;
	}
	/*把父进程的线程描述符内容赋值给ti所指向的结构*/
	*ti = *orig->thread_info;
	/*把父进程的进程描述符的内容赋值给tsk所指向的结构*/
	*tsk = *orig;
	/*把tsk->thread_info设置为ti*/
	tsk->thread_info = ti;
	/*把ti->task设置为tsk*/
	ti->task = tsk;

	/* One for us, one for whoever does the "release_task()" (usually parent) */
	atomic_set(&tsk->usage,2); //把新进程描述符的使用计数器设置为2,用来表示进程描述符正在被使用而且其相应的进程处于活动状态
	/*返回子进程的进程描述符指针*/
	return tsk;
}

3.4、 copy_flags

static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
    
    
	unsigned long new_flags = p->flags;
	/*清除PF_SUPERPRIV位(表示进程是否使用了超级特权)*/
	new_flags &= ~PF_SUPERPRIV;
	/*设置PF_FORKNOEXEC标志,表示系统还没有发出execve()系统调用*/
	new_flags |= PF_FORKNOEXEC;
	if (!(clone_flags & CLONE_PTRACE))
		p->ptrace = 0;
	/*更新flags*/
	p->flags = new_flags;
}

猜你喜欢

转载自blog.csdn.net/weixin_45309916/article/details/127006981