1. CreateContainer 函数
接收到 GRPC 消息为 CreateContainerRequest 请求
func (a *agentGRPC) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (resp *gpb.Empty, err error) {
if err := a.createContainerChecks(req); err != nil {
return emptyResp, err
}
// re-scan PCI bus
// looking for hidden devices
if err = rescanPciBus(); err != nil {
agentLog.WithError(err).Warn("Could not rescan PCI bus")
}
中间略过各种配置转换
2. finishCreateContainer 函数
func (a *agentGRPC) finishCreateContainer(ctr *container, req *pb.CreateContainerRequest, config *configs.Config) (resp *gpb.Empty, err error) {
containerPath := filepath.Join(libcontainerPath, a.sandbox.id)
factory, err := libcontainer.New(containerPath, libcontainer.Cgroupfs)
if err != nil {
return emptyResp, err
}
2.1 libcontainer.New 函数
看起来是不是很熟悉,在 runc 代码中,看到了确实直接拿 runc 直接来用,路径 github.com/opencontainers/runc/libcontainer/factory_linux.go,LinuxFactory 实现了 Factory 接口
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
if root != "" {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
}
l := &LinuxFactory{
Root: root,
InitPath: "/proc/self/exe",
InitArgs: []string{os.Args[0], "init"},
Validator: validate.New(),
CriuPath: "criu",
}
Cgroupfs(l)
for _, opt := range options {
if opt == nil {
continue
}
if err := opt(l); err != nil {
return nil, err
}
}
return l, nil
}
2.2 factory.Create 函数
路径 github.com/opencontainers/runc/libcontainer/factory_linux.go,LinuxFactory 实现了 Create 方法
做了一大堆验证,目录创建以及权限的设置
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
if err := l.validateID(id); err != nil {
return nil, err
}
2.2.1 linuxContainer 结构体实现了 Container 接口,目录为 github.com/opencontainers/runc/libcontainer/container_linux.go
c := &linuxContainer{
id: id,
root: containerRoot,
config: config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c}
2.3 execProcess 函数启动进程
// Shared function between CreateContainer and ExecProcess, because those expect
// a process to be run.
func (a *agentGRPC) execProcess(ctr *container, proc *process, createContainer bool) (err error) {
if ctr == nil {
return grpcStatus.Error(codes.InvalidArgument, "Container cannot be nil")
}
if proc == nil {
return grpcStatus.Error(codes.InvalidArgument, "Process cannot be nil")
}
2.3.1 执行 Start 或者 Run 方法
分别讲解
if createContainer {
err = ctr.container.Start(&proc.process)
} else {
err = ctr.container.Run(&(proc.process))
}
if err != nil {
return grpcStatus.Errorf(codes.Internal, "Could not run process: %v", err)
}
引入,runc 代码流程图
3. container.Start 函数
如果进程初始化会创建 fifo 管道
func (c *linuxContainer) Start(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
if process.Init {
if err := c.createExecFifo(); err != nil {
return err
}
}
3.1 start 函数
func (c *linuxContainer) start(process *Process) error {
parent, err := c.newParentProcess(process)
if err != nil {
return newSystemErrorWithCause(err, "creating new parent process")
}
3.1.1 newParentProcess 函数
- 创建一对pipe,parentPipe和childPipe,作为 start 进程与容器内部 init 进程通信管道
- 创建一个命令模版作为 Parent 进程启动的模板
- newInitProcess 封装 initProcess。主要工作为添加初始化类型环境变量,将namespace、uid/gid 映射等信息使用 bootstrapData 封装为一个 io.Reader
initProcess 实现了 parentProcess 接口
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new init pipe")
}
messageSockPair := filePair{parentInitPipe, childInitPipe}
parentLogPipe, childLogPipe, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("Unable to create the log pipe: %s", err)
}
logFilePair := filePair{parentLogPipe, childLogPipe}
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
}
if !p.Init {
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
}
// We only set up fifoFd if we're not doing a `runc exec`. The historic
// reason for this is that previously we would pass a dirfd that allowed
// for container rootfs escape (and not doing it in `runc exec` avoided
// that problem), but we no longer do that. However, there's no need to do
// this for `runc exec` so we just keep it this way to be safe.
if err := c.includeExecFifo(cmd); err != nil {
return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
}
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
}
3.1.2 initProcess start 函数
创建新的进程。而此时新的进程使用 /proc/self/exec 为执行入口,参数为 init,会在 main 函数调用之前执行,所以在新的进程中 func init() 会直接调用,而不会去执行main函数
func (p *initProcess) start() error {
defer p.messageSockPair.parent.Close()
err := p.cmd.Start()
p.process.ops = p
// close the write-side of the pipes (controlled by child)
p.messageSockPair.child.Close()
p.logFilePair.child.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
}
cmd 如最后命令所示,Path填充为 /proc/self/exe(本身 agent)。参数字段 Args 为 init,表示对容器进行初始化,调用的为 agent init
agent 最后直接复用 runc 代码
3.1.3 init 函数
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
agentLog.WithError(err).Error("init failed")
}
panic("--this line should have never been executed, congratulations--")
}
}
4. StartInitialization
从 pipe 读取 parent 发送过来的配置,从环境变量 _LIBCONTAINER_INITPIPE 读取管道文件描述符
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
var (
pipefd, fifofd int
consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
)
4.1 newContainerInit 函数
处理类型为 setns 或者 standard 实现了接口 initer 的 Init 方法
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return nil, err
}
switch t {
case initSetns:
return &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
consoleSocket: consoleSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
}
5. linuxStandardInit Init 方法
路径 github.com/opencontainers/runc/libcontainer/standard_init_linux.go
func (l *linuxStandardInit) Init() error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
- setupNetwork: 配置容器的网络,调用第三方 netlink.LinkSetup
- setupRoute: 配置容器静态路由信息,调用第三方 netlink.RouteAdd
- label.Init: 检查selinux是否被启动并将结果存入全局变量。
- finalizeNamespace: 根据config配置将需要的特权capabilities加入白名单,设置user namespace,关闭不需要的文件描述符。
- unix.Openat: 只写方式打开fifo管道并写入0,会一直保持阻塞,直到管道的另一端以读方式打开,并读取内容
- syscall.Exec 系统调用来执行用户所指定的在容器中运行的程序
配置 hostname、apparmor、processLabel、sysctl、readonlyPath、maskPath。create 虽然不会执行命令,但会检查命令路径,错误会在 create 期间返回
3.1.1 setupNetWork函数
配置容器的网络,调用第三方 netlink.LinkSetup,相当于命令 ip link set $link up
如果不指定任何网络,只有loopback
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.initialize(config); err != nil {
return err
}
}
return nil
}
3.1.2 setupRoute
配置容器静态路由信息,调用第三方 netlink.RouteAdd,相当于命令 ip route add $route
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
_, dst, err := net.ParseCIDR(config.Destination)
if err != nil {
return err
}
src := net.ParseIP(config.Source)
if src == nil {
return fmt.Errorf("Invalid source for route: %s", config.Source)
}
gw := net.ParseIP(config.Gateway)
if gw == nil {
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
}
l, err := netlink.LinkByName(config.InterfaceName)
if err != nil {
return err
}
route := &netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Dst: dst,
Src: src,
Gw: gw,
LinkIndex: l.Attrs().Index,
}
if err := netlink.RouteAdd(route); err != nil {
return err
}
}
return nil
}
3.1.3 syncParentReady 函数发送 ready 到 pipe,等待父进程下发 exec 命令
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
return readSync(pipe, procRun)
}
3.1.4 只写方式打开fifo管道并写入0,会一直保持阻塞,直到管道的另一端以读方式打开,并读取内容
// Wait for the FIFO to be opened on the other side before exec-ing the
// user process. We open it through /proc/self/fd/$fd, because the fd that
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
// re-open an O_PATH fd through /proc.
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "open exec fifo")
}
3..1.5 系统调用来执行用户所指定的在容器中运行的程序
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}
kata agent 运行在虚拟机里面,那怎么进行调式呢?
内核参数需要添加:agent.log=debug agent.debug_console,在哪里添加呢
需要修改文件 /etc/kata-containers/configuration.toml,开启debug模式 enable_debug = true,内核参数添加 agnet.debug_console
Connect to the virtual machine using the debug console
$ id=$(sudo docker ps -q --no-trunc)
$ console="/var/run/vc/vm/${id}/console.sock"
$ sudo socat "stdin,raw,echo=0,escape=0x11" "unix-connect:${console}"