dpvs源代码分析——main主流程(未完成,还需补充)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/haolipengzhanshen/article/details/82555294

 第一篇博客我们简单介绍了下dpvs的编译,安装和部署使用,今天我们一起来走读下dpvs的主流程。

一、main函数总体概述

main函数的代码如下

int main(int argc, char *argv[])
{
    int err, nports;
    portid_t pid;
    struct netif_port *dev;
    struct timeval tv;
    char pql_conf_buf[LCORE_CONF_BUFFER_LEN];
    int pql_conf_buf_len = LCORE_CONF_BUFFER_LEN;
    uint32_t loop_cnt = 0;
    int timer_sched_loop_interval;

    /**
     * add application agruments parse before EAL ones.
     * use it like the following:
     * ./dpvs -v
     * OR
     * ./dpvs -- -n 4 -l 0-11 (if you want to use eal arguments)
     */
    err = parse_app_args(argc, argv);
    if (err < 0) {
        fprintf(stderr, "fail to parse application options\n");
        exit(EXIT_FAILURE);
    }
    argc -= err, argv += err;

    /* check if dpvs is running and remove zombie pidfile */
	/*检测dpvs是否已经运行*/
	if (dpvs_running(DPVS_PIDFILE)) {
        fprintf(stderr, "dpvs is already running\n");
        exit(EXIT_FAILURE);
    }

    dpvs_state_set(DPVS_STATE_INIT);

    gettimeofday(&tv, NULL);
    srandom(tv.tv_sec ^ tv.tv_usec ^ getpid());

	/*检测numa架构的socket数是否大于dpvs设置的最大值*/
    if (get_numa_nodes() > DPVS_MAX_SOCKET) {
        fprintf(stderr, "DPVS_MAX_SOCKET is smaller than system numa nodes!\n");
        return -1;
    }

    if (set_all_thread_affinity() != 0) {
        fprintf(stderr, "set_all_thread_affinity failed\n");
        exit(EXIT_FAILURE);
    }

	/*dpdk eal环境的初始化*/
    err = rte_eal_init(argc, argv);
    if (err < 0)
        rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
    argc -= err, argv += err;

    RTE_LOG(INFO, DPVS, "dpvs version: %s, build on %s\n", DPVS_VERSION, DPVS_BUILD_DATE);

	/*dpdk 时间子系统初始化*/
    rte_timer_subsystem_init();

	/*配置文件初始化*/
    if ((err = cfgfile_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail init configuration file: %s\n",
                 dpvs_strerror(err));
    
    /*添加虚拟网络设备*/
    if ((err = netif_virtual_devices_add()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail add virtual devices:%s\n",
                 dpvs_strerror(err));

	/*定时器初始化,分为作用在slave核心的定时器和全局定时器*/
    if ((err = dpvs_timer_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail init timer on %s\n", dpvs_strerror(err));

	/*限速功能初始化*/
    if ((err = tc_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init traffic control: %s\n",
                 dpvs_strerror(err));

	/*网卡设备的相关初始化*/
    if ((err = netif_init(NULL)) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init netif: %s\n", dpvs_strerror(err));
    /* Default lcore conf and port conf are used and may be changed here 
     * with "netif_port_conf_update" and "netif_lcore_conf_set" */

	/*控制平面初始化*/
    if ((err = ctrl_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init ctrl plane: %s\n",
                 dpvs_strerror(err));

	/*限速功能的控制面初始化*/
    if ((err = tc_ctrl_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init tc control plane: %s\n",
                 dpvs_strerror(err));

	/*vlan初始化*/
    if ((err = vlan_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init vlan: %s\n", dpvs_strerror(err));

	/*轻量级的ip协议栈,包括ip,arp,icmp,route,inet_addr等*/
    if ((err = inet_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init inet: %s\n", dpvs_strerror(err));

	/*网卡的Flow Director功能,根据不同的过滤条件,将流分入不同的队列*/
    if ((err = sa_pool_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init sa_pool: %s\n", dpvs_strerror(err));

	/*ip隧道初始化*/
    if ((err = ip_tunnel_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init tunnel: %s\n", dpvs_strerror(err));

	/**/
    if ((err = dp_vs_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init ipvs: %s\n", dpvs_strerror(err));

    if ((err = dpvs_firewall_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init firewall: %s\n", dpvs_strerror(err));

	/*netif模块的控制面初始化*/
    if ((err = netif_ctrl_init()) != EDPVS_OK)
        rte_exit(EXIT_FAILURE, "Fail to init netif_ctrl: %s\n",
                 dpvs_strerror(err));

    /* config and start all available dpdk ports */
    nports = rte_eth_dev_count();
    for (pid = 0; pid < nports; pid++) {
        dev = netif_port_get(pid);
        if (!dev) {
            RTE_LOG(WARNING, DPVS, "port %d not found\n", pid);
            continue;
        }

		/*配置发送队列,接收队列,将网卡配置ok后,启动*/
        err = netif_port_start(dev);
        if (err != EDPVS_OK)
            RTE_LOG(WARNING, DPVS, "Start %s failed, skipping ...\n",
                    dev->name);
    }

    /* print port-queue-lcore relation */
    netif_print_lcore_conf(pql_conf_buf, &pql_conf_buf_len, true, 0);
    RTE_LOG(INFO, DPVS, "\nport-queue-lcore relation array: \n%s\n",
            pql_conf_buf);

    /* start data plane threads */
	/*启动数据面线程,用于处理从网卡上接收到的数据*/
    netif_lcore_start();

    /* write pid file */
    if (!pidfile_write(DPVS_PIDFILE, getpid()))
        goto end;

	/*定时器调度的精度*/
    timer_sched_loop_interval = dpvs_timer_sched_interval_get();
    assert(timer_sched_loop_interval > 0);

    dpvs_state_set(DPVS_STATE_NORMAL);

    /* start control plane thread */
    while (1) {
        /* reload configuations if reload flag is set */
        try_reload();
		
        /* IPC loop */
        sockopt_ctl(NULL);
	
        /* msg loop */
        msg_master_process();

        /* timer */
        loop_cnt++;
        if (loop_cnt % timer_sched_loop_interval == 0)
            rte_timer_manage();
		
        /* kni 处理kni接口上的数据*/
        kni_process_on_master();

        /* process mac ring on master */
        neigh_process_ring(NULL);
 
        /* increase loop counts */
        netif_update_master_loop_cnt();
    }

end:
    dpvs_state_set(DPVS_STATE_FINISH);
    if ((err = netif_ctrl_term()) !=0 )
        rte_exit(EXIT_FAILURE, "Fail to term netif_ctrl: %s\n",
                 dpvs_strerror(err));
    if ((err = dp_vs_term()) != EDPVS_OK)
        RTE_LOG(ERR, DPVS, "Fail to term ipvs: %s\n", dpvs_strerror(err));
    if ((err = ip_tunnel_term()) != EDPVS_OK)
        RTE_LOG(ERR, DPVS, "Fail to term tunnel: %s\n", dpvs_strerror(err));
    if ((err = sa_pool_term()) != EDPVS_OK)
        RTE_LOG(ERR, DPVS, "Fail to term sa_pool: %s\n", dpvs_strerror(err));
    if ((err = inet_term()) != EDPVS_OK)
        RTE_LOG(ERR, DPVS, "Fail to term inet: %s\n", dpvs_strerror(err));
    if ((err = dpvs_timer_term()) != EDPVS_OK)
        RTE_LOG(ERR, DPVS, "Fail to term timer: %s\n", dpvs_strerror(err));
    if ((err = ctrl_term()) != 0)
        RTE_LOG(ERR, DPVS, "Fail to term ctrl plane\n");
    if ((err = netif_term()) != 0)
        RTE_LOG(ERR, DPVS, "Fail to term route\n");
    if ((err = cfgfile_term()) != 0)
        RTE_LOG(ERR, DPVS, "Fail to term configuration file: %s\n",
                dpvs_strerror(err));
    pidfile_rm(DPVS_PIDFILE);

    exit(0);
}

parse_app_args():解析应用程序的参数,可自行添加代码

set_all_thread_affinity():将main线程设置成系统最终的状态,即可被调度到每个cpu核心上

rte_eal_init():dpdk eal环境抽象层的初始化

rte_timer_subsystem_init():时间子系统的初始化,想使用dpdk的定时器,必须调用此函数,老流弊了!

cfgfile_init():配置文件的初始化,从配置文件中读取配置,这个我后续单开一篇博客来讲解dpvs的配置文件的设计

dpvs_timer_init():定时器初始化,分为每个slave核心的定时器  和全局定时器

tc_init():限速功能初始化

netif_init():网卡设备的相关初始化

ctrl_init():控制平面初始化

tc_ctrl_init():限速功能的控制面初始化

vlan_init():vlan初始化

inet_init():轻量级的ip协议栈,包括ip,arp,icmp,route,inet_addr等

sa_pool_init():网卡的Flow Director功能,根据不同的过滤条件,将流分入不同的队列

ip_tunnel_init():ip隧道初始化,ip隧道也是ddos防护的一种方式

dp_vs_init():dpvs内部的初始化,udp,tcp,icmp协议的初始化,连接表connlist的初始化,黑名单的初始化等等。

netif_ctrl_init():netif模块的控制面初始化

 /* config and start all available dpdk ports */
    nports = rte_eth_dev_count();
    for (pid = 0; pid < nports; pid++) {
        dev = netif_port_get(pid);
        if (!dev) {
            RTE_LOG(WARNING, DPVS, "port %d not found\n", pid);
            continue;
        }

		/*配置发送队列,接收队列,将网卡配置ok后,启动*/
        err = netif_port_start(dev);
        if (err != EDPVS_OK)
            RTE_LOG(WARNING, DPVS, "Start %s failed, skipping ...\n",
                    dev->name);
    }

调用rte_eth_dev_count()获取系统可用的端口数,在netif_port_start(dev)函数,流程如下

1. rte_eth_dev_configure设置网卡的属性,如发送队列数,接收队列中,rte_eth_conf结构体

2. 创建发送队列,创建接收队列

3. netif_print_port_conf打印conf配置信息,防止手误配置出错

4.build_port_queue_lcore_map函数是建立端口-队列-cpu逻辑核心三者之间的映射关系。

5.rte_eth_dev_start函数来启动设备

6.获取网卡的启动状态。rte_eth_link_get_nowait函数用于获取链路层的状态,非等待版本。

7.如果开启了混杂模式,则调用rte_eth_promiscuous_enable函数开启混杂模式。

将重点介绍下面几个模块的初始化过程

二、netif_init模块初始化

int netif_init(const struct rte_eth_conf *conf)
{
	/*每秒运行的cpu时钟周期*/
    cycles_per_sec = rte_get_timer_hz();
	
	/*根据numa socket创建mbuf内存池,用于接收数据包*/
    netif_pktmbuf_pool_init();

	/*创建用于接收arp数据包的队列*/
    netif_arp_ring_init();

	/*存储packet类型的链表pkt_type_tab的初始化*/
    netif_pkt_type_tab_init();

	/*初始化任务聊表netif_lcore_jobs*/
    netif_lcore_jobs_init();
	
    // use default port conf if conf=NULL
    // 如果conf参数为NULL,则使用默认的端口配置
    netif_port_init(conf);
	
	/*重点讲解的*/
    netif_lcore_init();
	
    return EDPVS_OK;
}

 重点讲解下netif_lcore_init函数

首先check_lcore_conf函数,检测下conf配置是否正确

/* register lcore jobs*/
    snprintf(netif_jobs[0].name, sizeof(netif_jobs[0].name) - 1, "%s", "recv_fwd");
    netif_jobs[0].func = lcore_job_recv_fwd;
    netif_jobs[0].data = NULL;
    netif_jobs[0].type = NETIF_LCORE_JOB_LOOP;
    snprintf(netif_jobs[1].name, sizeof(netif_jobs[1].name) - 1, "%s", "xmit");
    netif_jobs[1].func = lcore_job_xmit;
    netif_jobs[1].data = NULL;
    netif_jobs[1].type = NETIF_LCORE_JOB_LOOP;
    snprintf(netif_jobs[2].name, sizeof(netif_jobs[2].name) - 1, "%s", "timer_manage");
    netif_jobs[2].func = lcore_job_timer_manage;
    netif_jobs[2].data = NULL;
    netif_jobs[2].type = NETIF_LCORE_JOB_LOOP;

    for (ii = 0; ii < NETIF_JOB_COUNT; ii++) {
        res = netif_lcore_loop_job_register(&netif_jobs[ii]);
        if (res < 0) {
            rte_exit(EXIT_FAILURE, 
                    "[%s] Fail to register netif lcore jobs, exiting ...\n", __func__);
            break;
        }
    }

 在每个cpu逻辑核心上注册job任务来执行,关于这块的详细分析请参考连接

https://blog.csdn.net/haolipengzhanshen/article/details/82414350

三、ctrl_init控制模块初始化

int ctrl_init(void)
{
    int ret;

	/*初始化多播等待队列的读写锁*/
    rte_rwlock_init(&mc_wait_lock);

	/*初始化消息相关的资源*/
    ret = msg_init();
    if (unlikely(ret < 0)) {
        RTE_LOG(ERR, MSGMGR, "%s: msg module initialization failed!\n", __func__);
        return ret;
    }
	/*sockopt控制面的初始化*/
    ret = sockopt_init();
    if (unlikely(ret < 0)) {
        RTE_LOG(ERR, MSGMGR, "%s: sockopt module initialization failed!\n", __func__);
        return ret;
    }
    return EDPVS_OK;
}

 转到msg_init函数,看看具体实现

static inline int msg_init(void)
{
    int ii, jj;
    int ret;
    char ring_name[16];
    char buf[4096];

    if (DPVS_MAX_LCORE > MSG_MAX_LCORE_SUPPORTED)
        return EDPVS_NOTSUPP;

    /* lcore mask init */
    slave_lcore_mask = 0;
    slave_lcore_nb = 0;
    master_lcore = rte_get_master_lcore();

	/*获取配置的slava lcore的个数和掩码*/
    netif_get_slave_lcores(&slave_lcore_nb, &slave_lcore_mask);
    if (slave_lcore_nb > 64) {
        RTE_LOG(ERR, MSGMGR, "%s: only %d lcores supported for ctrl\n", __func__, 64);
        return EDPVS_INVAL;
    }

    /* per-lcore msg type array init */
	/*初始化mt_array和mt_lock二维数组*/
    for (ii = 0; ii < DPVS_MAX_LCORE; ii++) {
        for (jj = 0; jj < DPVS_MSG_LEN; jj++) {
            INIT_LIST_HEAD(&mt_array[ii][jj]);
            rte_rwlock_init(&mt_lock[ii][jj]);
        }
    }

    /* multicast queue init */
    mc_wait_list.free_cnt = msg_mc_qlen;
    INIT_LIST_HEAD(&mc_wait_list.list);

    /* per-lcore msg queue */
	/*创建每个cpu核心对应的消息队列*/
    for (ii = 0; ii < DPVS_MAX_LCORE; ii++) {
        snprintf(ring_name, sizeof(ring_name), "msg_ring_%d", ii);
        msg_ring[ii] = rte_ring_create(ring_name, msg_ring_size,
                rte_socket_id(), 0/*RING_F_SC_DEQ*/);
        if (unlikely(NULL == msg_ring[ii])) {
            RTE_LOG(ERR, MSGMGR, "Fail to init ctrl !\n");
                    return EDPVS_DPDKAPIFAIL;
        }
    }

    /* register netif-lcore-loop-job for Slaves */
	//注册job到lcore核心上
    snprintf(ctrl_lcore_job.name, sizeof(ctrl_lcore_job.name) - 1, "%s", "slave_ctrl_plane");
    ctrl_lcore_job.func = slave_lcore_loop_func;
    ctrl_lcore_job.data = NULL;
    ctrl_lcore_job.type = NETIF_LCORE_JOB_LOOP;
    if ((ret = netif_lcore_loop_job_register(&ctrl_lcore_job)) < 0) {
        RTE_LOG(ERR, MSGMGR, "%s: fail to register ctrl func on slave lcores\n", __func__);
        return ret;
    }

    /* register built-in msg type */
    register_built_in_msg();
    msg_type_table_print(buf, sizeof(buf));
    RTE_LOG(INFO, MSGMGR, "%s: built-in msg registered:\n%s\n", __func__, buf);

    return EDPVS_OK;
}

1)初始化mt_array二维数组,用于存储不同lcore核心上的不同消息类型的dpvs_msg_type结构体元素,一维下标是cpu逻辑核心lcoreid,二维下标是消息类型。

2)初始化mt_lock二维数组,是给mt_array加锁用的,和mt_array一一对应。

3)初始化多播等待队列mc_wait_list 

4)为每个lcore核心创建队列,用于接收消息

5)将ctrl_lcore_job结构体注册到系统中,在netif_loop中注册的回调函数会被调用。

6)register_built_in_msg为注册内置消息类型。消息类型有MSG_TYPE_REG和MSG_TYPE_UNREG,对应的回调函数是msg_type_reg_cb和msg_type_unreg_cb

四、inet_init轻量级ip-stack模块初始化

int inet_init(void)
{
    int err;

	/*arp初始化*/
    if ((err = neigh_init()) != 0)
        return err;
	
	/*路由功能初始化*/
    if ((err = route_init()) != 0)
        return err;
	
	/*ip协议初始化*/
    if ((err = ipv4_init()) != 0)
        return err;

	/*icmp协议初始化*/
    if ((err = icmp_init()) != 0)
        return err;
	
	//网络地址初始化
    if ((err = inet_addr_init()) != 0)
        return err;

    return EDPVS_OK;
}

 看上图,下面分别介绍inet_init中每个模块的初始化

4.1 arp初始化

static int arp_init(void)
{
    int i, j;
    int err;
    uint64_t lcore_mask;
    lcoreid_t cid;

    for (i = 0; i < DPVS_MAX_LCORE; i++) {
        for (j = 0; j < ARP_TAB_SIZE; j++) {
            INIT_LIST_HEAD(&neigh_table[i][j]);
        }
    }


    /*choose one core to sync master*/
	//选择一个cpu核心和master核心同步消息
    netif_get_slave_lcores(NULL, &lcore_mask);

    for (cid = 0 ; cid < DPVS_MAX_LCORE; cid++) {
        if (lcore_mask & (1L << cid)) {
            g_cid = cid;
            break;
        }
    }

    master_cid = rte_lcore_id();

    arp_pkt_type.type = rte_cpu_to_be_16(ETHER_TYPE_ARP);

	/*注册arp类型数据包处理函数*/
    if ((err = netif_register_pkt(&arp_pkt_type)) != EDPVS_OK)
        return err;
	/*注册控制平面的get和set回调函数*/
    if ((err = sockopt_register(&neigh_sockopts)) != EDPVS_OK)
        return err;

	/*链路层队列初始化*/
    neigh_ring_init();

    /*get static arp entry from master*/
	/*从master获取静态的arp表,回调函数是neigh_process_ring*/
    snprintf(neigh_sync_job.name, sizeof(neigh_sync_job.name) - 1, "%s", "neigh_sync");
    neigh_sync_job.func = neigh_process_ring;
    neigh_sync_job.data = NULL;
    neigh_sync_job.type = NETIF_LCORE_JOB_SLOW;
    neigh_sync_job.skip_loops = NEIGH_PROCESS_MAC_RING_INTERVAL;
    err = netif_lcore_loop_job_register(&neigh_sync_job);
    if (err != EDPVS_OK)
        return err;

    return EDPVS_OK;
}

4.2 路由功能初始化

4.3 ip协议初始化

4.4 icmp协议初始化

4.5 网络地址初始化

猜你喜欢

转载自blog.csdn.net/haolipengzhanshen/article/details/82555294