版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u012570105/article/details/82594089
https://www.cnblogs.com/ZCplayground/p/9381961.html
l3fwd example分析
- 编译
export RTE_KERNELDIR=/usr/src/linux-xxx
make T=x86_64-native-linuxapp-gcc O=x86_64-native-linuxapp-gcc -j16
make examples T=x86_64-native-linuxapp-gcc O=x86_64-native-linuxapp-gcc -j16
- 基本使用
$./build/l3fwd [EAL options] --
-p PORTMASK [-P] [-E] [-L]
--config(port,queue,lcore)[,(port,queue,lcore)]
[--eth-dest=X,MM:MM:MM:MM:MM:MM]
[--enable-jumbo [--max-pkt-len PKTLEN]]
[--no-numa]
[--hash-entry-num 0x0n]
[--ipv6]
[--parse-ptype]
-E: selects the Exact Match lookup method.
-L: selects the LPM lookup method.
-
example图示
- 显示了L3fwd应用程序的框图,该应用程序用于使用两个端口从流量生成器转发数据包。
- 最长前缀匹配(LPM)是一种表搜索方法,通常用于查找IP转发应用程序中的最佳路由匹配。L3fwd应用程序静态地配置一组规则,并在初始化时将它们加载到LPM对象中。默认情况下,L3fwd有一个静态定义的目标LPM表,包含8条路由(L3fwd使用包的IPv4目的地地址来标识下一跳; LPM表的输出端口ID。它还可以基于IPv6地址路由(从DPDK 17.05)。如下图:
- 精确匹配(EM)是一种基于哈希的表搜索方法,用于查找IP转发应用程序中的最佳路由匹配。在EM查找中,搜索键由五个元组值表示,即源IP地址、目标IP地址、源端口、目标端口和协议。应用程序使用的流集是静态配置的,并在初始化时加载到hash对象中。默认情况下,L3fwd有一个静态定义的目标EM表,有4条路由,如下图:
- 显示了L3fwd应用程序的框图,该应用程序用于使用两个端口从流量生成器转发数据包。
-
代码分析
main.c
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <sys/queue.h>
#include <stdarg.h>
#include <errno.h>
#include <getopt.h>
#include <signal.h>
#include <stdbool.h>
#include <rte_common.h>
#include <rte_vect.h>
#include <rte_byteorder.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
#include <rte_prefetch.h>
#include <rte_lcore.h>
#include <rte_per_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
#include <rte_random.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_ip.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
#include <cmdline_parse.h>
#include <cmdline_parse_etheraddr.h>
#include "l3fwd.h"
/*
* Configurable number of RX/TX ring descriptors
*/
#define RTE_TEST_RX_DESC_DEFAULT 128
#define RTE_TEST_TX_DESC_DEFAULT 512
#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
#define MAX_RX_QUEUE_PER_PORT 128
#define MAX_LCORE_PARAMS 1024
/* Static global variables used within this file. */
static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
/**< Ports set in promiscuous mode off by default. */
static int promiscuous_on;
/* Select Longest-Prefix or Exact match. */
static int l3fwd_lpm_on;
static int l3fwd_em_on;
static int numa_on = 1; /**< NUMA is enabled by default. */
static int parse_ptype; /**< Parse packet type using rx callback, and */
/**< disabled by default */
/* Global variables. */
volatile bool force_quit;
/* ethernet addresses of ports */
uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
xmm_t val_eth[RTE_MAX_ETHPORTS];
/* mask of enabled ports */
uint32_t enabled_port_mask;
/* Used only in exact match mode. */
int ipv6; /**< ipv6 is false by default. */
uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
struct lcore_conf lcore_conf[RTE_MAX_LCORE];
struct lcore_params {
uint8_t port_id;
uint8_t queue_id;
uint8_t lcore_id;
} __rte_cache_aligned;
static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
static struct lcore_params lcore_params_array_default[] = {
{0, 0, 2},
{0, 1, 2},
{0, 2, 2},
{1, 0, 2},
{1, 1, 2},
{1, 2, 2},
{2, 0, 2},
{3, 0, 3},
{3, 1, 3},
};
static struct lcore_params * lcore_params = lcore_params_array_default;
static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
sizeof(lcore_params_array_default[0]);
static struct rte_eth_conf port_conf = {
.rxmode = {
.mq_mode = ETH_MQ_RX_RSS,
.max_rx_pkt_len = ETHER_MAX_LEN,
.split_hdr_size = 0,
.header_split = 0, /**< Header Split disabled */
.hw_ip_checksum = 1, /**< IP checksum offload enabled */
.hw_vlan_filter = 0, /**< VLAN filtering disabled */
.jumbo_frame = 0, /**< Jumbo Frame Support disabled */
.hw_strip_crc = 0, /**< CRC stripped by hardware */
},
.rx_adv_conf = {
.rss_conf = {
.rss_key = NULL,
.rss_hf = ETH_RSS_IP,
},
},
.txmode = {
.mq_mode = ETH_MQ_TX_NONE,
},
};
static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
struct l3fwd_lkp_mode {
void (*setup)(int);
int (*check_ptype)(int);
rte_rx_callback_fn cb_parse_ptype;
int (*main_loop)(void *);
void* (*get_ipv4_lookup_struct)(int);
void* (*get_ipv6_lookup_struct)(int);
};
static struct l3fwd_lkp_mode l3fwd_lkp;
static struct l3fwd_lkp_mode l3fwd_em_lkp = {
.setup = setup_hash,
.check_ptype = em_check_ptype,
.cb_parse_ptype = em_cb_parse_ptype,
.main_loop = em_main_loop,
.get_ipv4_lookup_struct = em_get_ipv4_l3fwd_lookup_struct,
.get_ipv6_lookup_struct = em_get_ipv6_l3fwd_lookup_struct,
};
static struct l3fwd_lkp_mode l3fwd_lpm_lkp = {
.setup = setup_lpm,
.check_ptype = lpm_check_ptype,
.cb_parse_ptype = lpm_cb_parse_ptype,
.main_loop = lpm_main_loop,
.get_ipv4_lookup_struct = lpm_get_ipv4_l3fwd_lookup_struct,
.get_ipv6_lookup_struct = lpm_get_ipv6_l3fwd_lookup_struct,
};
/*
* Setup lookup methods for forwarding.
* Currently exact-match and longest-prefix-match
* are supported ones.
*/
static void
setup_l3fwd_lookup_tables(void)
{
/* Setup HASH lookup functions. */
if (l3fwd_em_on)
l3fwd_lkp = l3fwd_em_lkp;
/* Setup LPM lookup functions. */
else
l3fwd_lkp = l3fwd_lpm_lkp;
}
static int
check_lcore_params(void)
{
uint8_t queue, lcore;
uint16_t i;
int socketid;
for (i = 0; i < nb_lcore_params; ++i) {
queue = lcore_params[i].queue_id;
if (queue >= MAX_RX_QUEUE_PER_PORT) {
printf("invalid queue number: %hhu\n", queue);
return -1;
}
lcore = lcore_params[i].lcore_id;
if (!rte_lcore_is_enabled(lcore)) {
printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
return -1;
}
if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
(numa_on == 0)) {
printf("warning: lcore %hhu is on socket %d with numa off \n",
lcore, socketid);
}
}
return 0;
}
static int
check_port_config(const unsigned nb_ports)
{
unsigned portid;
uint16_t i;
for (i = 0; i < nb_lcore_params; ++i) {
portid = lcore_params[i].port_id;
if ((enabled_port_mask & (1 << portid)) == 0) {
printf("port %u is not enabled in port mask\n", portid);
return -1;
}
if (portid >= nb_ports) {
printf("port %u is not present on the board\n", portid);
return -1;
}
}
return 0;
}
static uint8_t
get_port_n_rx_queues(const uint8_t port)
{
int queue = -1;
uint16_t i;
for (i = 0; i < nb_lcore_params; ++i) {
if (lcore_params[i].port_id == port) {
if (lcore_params[i].queue_id == queue+1)
queue = lcore_params[i].queue_id;
else
rte_exit(EXIT_FAILURE, "queue ids of the port %d must be"
" in sequence and must start with 0\n",
lcore_params[i].port_id);
}
}
return (uint8_t)(++queue);
}
static int
init_lcore_rx_queues(void)
{
uint16_t i, j, nb_rx_queue;
uint8_t lcore;
for (i = 0; i < nb_lcore_params; ++i) {
lcore = lcore_params[i].lcore_id;
nb_rx_queue = lcore_conf[lcore].n_rx_queue;
if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
printf("error: too many queues (%u) for lcore: %u\n",
(unsigned)nb_rx_queue + 1, (unsigned)lcore);
return -1;
} else {
lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
lcore_params[i].port_id;
lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
lcore_params[i].queue_id;
lcore_conf[lcore].n_rx_queue++;
}
}
// dump出每个逻辑核的收发收包队列分配
printf("RTE_MAX_LCORE = %d\n", RTE_MAX_LCORE);
for (i = 0; i < RTE_MAX_LCORE; ++i) {
for (j = 0; j < lcore_conf[i].n_rx_queue; j++) {
printf("lcore_conf[%d]: rx_q_idx = %d, queue_id = %d, rx_port_id=%d\n", i, \
j, lcore_conf[i].rx_queue_list[j].queue_id, lcore_conf[i].rx_queue_list[j].port_id);
}
}
return 0;
}
/* display usage */
static void
print_usage(const char *prgname)
{
printf ("%s [EAL options] -- -p PORTMASK -P"
" [--config (port,queue,lcore)[,(port,queue,lcore]]"
" [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
" -p PORTMASK: hexadecimal bitmask of ports to configure\n"
" -P : enable promiscuous mode\n"
" -E : enable exact match\n"
" -L : enable longest prefix match\n"
" --config (port,queue,lcore): rx queues configuration\n"
" --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n"
" --no-numa: optional, disable numa awareness\n"
" --ipv6: optional, specify it if running ipv6 packets\n"
" --enable-jumbo: enable jumbo frame"
" which max packet len is PKTLEN in decimal (64-9600)\n"
" --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n",
prgname);
}
static int
parse_max_pkt_len(const char *pktlen)
{
char *end = NULL;
unsigned long len;
/* parse decimal string */
len = strtoul(pktlen, &end, 10);
if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
return -1;
if (len == 0)
return -1;
return len;
}
static int
parse_portmask(const char *portmask)
{
char *end = NULL;
unsigned long pm;
/* parse hexadecimal string */
pm = strtoul(portmask, &end, 16);
if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
return -1;
if (pm == 0)
return -1;
return pm;
}
static int
parse_hash_entry_number(const char *hash_entry_num)
{
char *end = NULL;
unsigned long hash_en;
/* parse hexadecimal string */
hash_en = strtoul(hash_entry_num, &end, 16);
if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0'))
return -1;
if (hash_en == 0)
return -1;
return hash_en;
}
static int
parse_config(const char *q_arg)
{
char s[256];
const char *p, *p0 = q_arg;
char *end;
enum fieldnames {
FLD_PORT = 0,
FLD_QUEUE,
FLD_LCORE,
_NUM_FLD
};
unsigned long int_fld[_NUM_FLD];
char *str_fld[_NUM_FLD];
int i;
unsigned size;
nb_lcore_params = 0;
while ((p = strchr(p0,'(')) != NULL) {
++p;
if((p0 = strchr(p,')')) == NULL)
return -1;
size = p0 - p;
if(size >= sizeof(s))
return -1;
snprintf(s, sizeof(s), "%.*s", size, p);
if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
return -1;
for (i = 0; i < _NUM_FLD; i++){
errno = 0;
int_fld[i] = strtoul(str_fld[i], &end, 0);
if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
return -1;
}
if (nb_lcore_params >= MAX_LCORE_PARAMS) {
printf("exceeded max number of lcore params: %hu\n",
nb_lcore_params);
return -1;
}
lcore_params_array[nb_lcore_params].port_id =
(uint8_t)int_fld[FLD_PORT];
lcore_params_array[nb_lcore_params].queue_id =
(uint8_t)int_fld[FLD_QUEUE];
lcore_params_array[nb_lcore_params].lcore_id =
(uint8_t)int_fld[FLD_LCORE];
++nb_lcore_params;
}
lcore_params = lcore_params_array;
return 0;
}
static void
parse_eth_dest(const char *optarg)
{
uint8_t portid;
char *port_end;
uint8_t c, *dest, peer_addr[6];
errno = 0;
portid = strtoul(optarg, &port_end, 10);
if (errno != 0 || port_end == optarg || *port_end++ != ',')
rte_exit(EXIT_FAILURE,
"Invalid eth-dest: %s", optarg);
if (portid >= RTE_MAX_ETHPORTS)
rte_exit(EXIT_FAILURE,
"eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n",
portid, RTE_MAX_ETHPORTS);
if (cmdline_parse_etheraddr(NULL, port_end,
&peer_addr, sizeof(peer_addr)) < 0)
rte_exit(EXIT_FAILURE,
"Invalid ethernet address: %s\n",
port_end);
dest = (uint8_t *)&dest_eth_addr[portid];
for (c = 0; c < 6; c++)
dest[c] = peer_addr[c];
*(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
}
#define MAX_JUMBO_PKT_LEN 9600
#define MEMPOOL_CACHE_SIZE 256
#define CMD_LINE_OPT_CONFIG "config"
#define CMD_LINE_OPT_ETH_DEST "eth-dest"
#define CMD_LINE_OPT_NO_NUMA "no-numa"
#define CMD_LINE_OPT_IPV6 "ipv6"
#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
/*
* This expression is used to calculate the number of mbufs needed
* depending on user input, taking into account memory for rx and
* tx hardware rings, cache per lcore and mtable per port per lcore.
* RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
* value of 8192
*/
#define NB_MBUF RTE_MAX( \
(nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
nb_ports*nb_lcores*MAX_PKT_BURST + \
nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
nb_lcores*MEMPOOL_CACHE_SIZE), \
(unsigned)8192)
/* Parse the argument given in the command line of the application */
static int
parse_args(int argc, char **argv)
{
int opt, ret;
char **argvopt;
int option_index;
char *prgname = argv[0];
static struct option lgopts[] = {
{CMD_LINE_OPT_CONFIG, 1, 0, 0},
{CMD_LINE_OPT_ETH_DEST, 1, 0, 0},
{CMD_LINE_OPT_NO_NUMA, 0, 0, 0},
{CMD_LINE_OPT_IPV6, 0, 0, 0},
{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
{NULL, 0, 0, 0}
};
argvopt = argv;
/* Error or normal output strings. */
const char *str1 = "L3FWD: Invalid portmask";
const char *str2 = "L3FWD: Promiscuous mode selected";
const char *str3 = "L3FWD: Exact match selected";
const char *str4 = "L3FWD: Longest-prefix match selected";
const char *str5 = "L3FWD: Invalid config";
const char *str6 = "L3FWD: NUMA is disabled";
const char *str7 = "L3FWD: IPV6 is specified";
const char *str8 =
"L3FWD: Jumbo frame is enabled - disabling simple TX path";
const char *str9 = "L3FWD: Invalid packet length";
const char *str10 = "L3FWD: Set jumbo frame max packet len to ";
const char *str11 = "L3FWD: Invalid hash entry number";
const char *str12 =
"L3FWD: LPM and EM are mutually exclusive, select only one";
const char *str13 = "L3FWD: LPM or EM none selected, default LPM on";
while ((opt = getopt_long(argc, argvopt, "p:PLE",
lgopts, &option_index)) != EOF) {
switch (opt) {
/* portmask */
case 'p':
enabled_port_mask = parse_portmask(optarg);
if (enabled_port_mask == 0) {
printf("%s\n", str1);
print_usage(prgname);
return -1;
}
break;
case 'P':
printf("%s\n", str2);
promiscuous_on = 1;
break;
case 'E':
printf("%s\n", str3);
l3fwd_em_on = 1;
break;
case 'L':
printf("%s\n", str4);
l3fwd_lpm_on = 1;
break;
/* long options */
case 0:
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_CONFIG,
sizeof(CMD_LINE_OPT_CONFIG))) {
ret = parse_config(optarg);
if (ret) {
printf("%s\n", str5);
print_usage(prgname);
return -1;
}
}
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_ETH_DEST,
sizeof(CMD_LINE_OPT_ETH_DEST))) {
parse_eth_dest(optarg);
}
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_NO_NUMA,
sizeof(CMD_LINE_OPT_NO_NUMA))) {
printf("%s\n", str6);
numa_on = 0;
}
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_IPV6,
sizeof(CMD_LINE_OPT_IPV6))) {
printf("%sn", str7);
ipv6 = 1;
}
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_ENABLE_JUMBO,
sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) {
struct option lenopts = {
"max-pkt-len", required_argument, 0, 0
};
printf("%s\n", str8);
port_conf.rxmode.jumbo_frame = 1;
/*
* if no max-pkt-len set, use the default
* value ETHER_MAX_LEN.
*/
if (0 == getopt_long(argc, argvopt, "",
&lenopts, &option_index)) {
ret = parse_max_pkt_len(optarg);
if ((ret < 64) ||
(ret > MAX_JUMBO_PKT_LEN)) {
printf("%s\n", str9);
print_usage(prgname);
return -1;
}
port_conf.rxmode.max_rx_pkt_len = ret;
}
printf("%s %u\n", str10,
(unsigned int)port_conf.rxmode.max_rx_pkt_len);
}
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_HASH_ENTRY_NUM,
sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {
ret = parse_hash_entry_number(optarg);
if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) {
hash_entry_number = ret;
} else {
printf("%s\n", str11);
print_usage(prgname);
return -1;
}
}
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_PARSE_PTYPE,
sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
printf("soft parse-ptype is enabled\n");
parse_ptype = 1;
}
break;
default:
print_usage(prgname);
return -1;
}
}
/* If both LPM and EM are selected, return error. */
if (l3fwd_lpm_on && l3fwd_em_on) {
printf("%s\n", str12);
return -1;
}
/*
* Nothing is selected, pick longest-prefix match
* as default match.
*/
if (!l3fwd_lpm_on && !l3fwd_em_on) {
l3fwd_lpm_on = 1;
printf("%s\n", str13);
}
/*
* ipv6 and hash flags are valid only for
* exact macth, reset them to default for
* longest-prefix match.
*/
if (l3fwd_lpm_on) {
ipv6 = 0;
hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
}
if (optind >= 0)
argv[optind-1] = prgname;
ret = optind-1;
optind = 0; /* reset getopt lib */
return ret;
}
static void
print_ethaddr(const char *name, const struct ether_addr *eth_addr)
{
char buf[ETHER_ADDR_FMT_SIZE];
ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
printf("%s%s", name, buf);
}
static int
init_mem(unsigned nb_mbuf)
{
struct lcore_conf *qconf;
int socketid;
unsigned lcore_id;
char s[64];
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
if (rte_lcore_is_enabled(lcore_id) == 0)
continue;
if (numa_on)
socketid = rte_lcore_to_socket_id(lcore_id);
else
socketid = 0;
if (socketid >= NB_SOCKETS) {
rte_exit(EXIT_FAILURE,
"Socket %d of lcore %u is out of range %d\n",
socketid, lcore_id, NB_SOCKETS);
}
if (pktmbuf_pool[socketid] == NULL) {
snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
pktmbuf_pool[socketid] =
rte_pktmbuf_pool_create(s, nb_mbuf,
MEMPOOL_CACHE_SIZE, 0,
RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
if (pktmbuf_pool[socketid] == NULL)
rte_exit(EXIT_FAILURE,
"Cannot init mbuf pool on socket %d\n",
socketid);
else
printf("Allocated mbuf pool on socket %d\n",
socketid);
/* Setup either LPM or EM(f.e Hash). */
l3fwd_lkp.setup(socketid);
}
qconf = &lcore_conf[lcore_id];
qconf->ipv4_lookup_struct =
l3fwd_lkp.get_ipv4_lookup_struct(socketid);
qconf->ipv6_lookup_struct =
l3fwd_lkp.get_ipv6_lookup_struct(socketid);
}
return 0;
}
/* Check the link status of all ports in up to 9s, and print them finally */
static void
check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
{
#define CHECK_INTERVAL 100 /* 100ms */
#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
uint8_t portid, count, all_ports_up, print_flag = 0;
struct rte_eth_link link;
printf("\nChecking link status");
fflush(stdout);
for (count = 0; count <= MAX_CHECK_TIME; count++) {
if (force_quit)
return;
all_ports_up = 1;
for (portid = 0; portid < port_num; portid++) {
if (force_quit)
return;
if ((port_mask & (1 << portid)) == 0)
continue;
memset(&link, 0, sizeof(link));
rte_eth_link_get_nowait(portid, &link);
/* print link status if flag set */
if (print_flag == 1) {
if (link.link_status)
printf("Port %d Link Up - speed %u "
"Mbps - %s\n", (uint8_t)portid,
(unsigned)link.link_speed,
(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
("full-duplex") : ("half-duplex\n"));
else
printf("Port %d Link Down\n",
(uint8_t)portid);
continue;
}
/* clear all_ports_up flag if any link down */
if (link.link_status == ETH_LINK_DOWN) {
all_ports_up = 0;
break;
}
}
/* after finally printing all link status, get out */
if (print_flag == 1)
break;
if (all_ports_up == 0) {
printf(".");
fflush(stdout);
rte_delay_ms(CHECK_INTERVAL);
}
/* set the print_flag if all ports up or timeout */
if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
print_flag = 1;
printf("done\n");
}
}
}
static void
signal_handler(int signum)
{
if (signum == SIGINT || signum == SIGTERM) {
printf("\n\nSignal %d received, preparing to exit...\n",
signum);
force_quit = true;
}
}
static int
prepare_ptype_parser(uint8_t portid, uint16_t queueid)
{
if (parse_ptype) {
printf("Port %d: softly parse packet type info\n", portid);
if (rte_eth_add_rx_callback(portid, queueid,
l3fwd_lkp.cb_parse_ptype,
NULL))
return 1;
printf("Failed to add rx callback: port=%d\n", portid);
return 0;
}
if (l3fwd_lkp.check_ptype(portid))
return 1;
printf("port %d cannot parse packet type, please add --%s\n",
portid, CMD_LINE_OPT_PARSE_PTYPE);
return 0;
}
int
main(int argc, char **argv)
{
char* name = "/sys/fs/cgroup/cpuset/tasks";
FILE* globaltask = NULL;
globaltask = fopen(name,"a");
int pid = getpid();
if(globaltask) {
fprintf(globaltask,"%d\n",pid);
fclose(globaltask);
}
struct lcore_conf *qconf;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf *txconf;
int ret;
unsigned nb_ports;
uint16_t queueid;
unsigned lcore_id;
uint32_t n_tx_queue, nb_lcores;
uint8_t portid, nb_rx_queue, queue, socketid;
/* init EAL */
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
argc -= ret;
argv += ret;
force_quit = false;
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
/* pre-init dst MACs for all ports to 02:00:00:00:00:xx */
for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
dest_eth_addr[portid] =
ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << 40);
*(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
}
// 单独指定目的接口的mac
// 0:00:e0:4c:0b:01:f4
// 1:00:e0:4c:0a:fe:70
dest_eth_addr[0] = (0xf40000000000 /*<< 40*/) + (0x0100000000/* << 32*/) + (0x0b << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
dest_eth_addr[1] = (0x700000000000/* << 40*/) + (0xfe00000000/* << 32*/) + (0x0a << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
*(uint64_t *)(val_eth + 0) = dest_eth_addr[0];
*(uint64_t *)(val_eth + 1) = dest_eth_addr[1];
/* parse application arguments (after the EAL ones) */
ret = parse_args(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
if (check_lcore_params() < 0)
rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
ret = init_lcore_rx_queues();
if (ret < 0)
rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
nb_ports = rte_eth_dev_count();
if (nb_ports > RTE_MAX_ETHPORTS)
nb_ports = RTE_MAX_ETHPORTS;
if (check_port_config(nb_ports) < 0)
rte_exit(EXIT_FAILURE, "check_port_config failed\n");
nb_lcores = rte_lcore_count();
/* Setup function pointers for lookup method. */
setup_l3fwd_lookup_tables();
/* initialize all ports */
for (portid = 0; portid < nb_ports; portid++) {
/* skip ports that are not enabled */
if ((enabled_port_mask & (1 << portid)) == 0) {
printf("\nSkipping disabled port %d\n", portid);
continue;
}
/* init port */
printf("Initializing port %d ... ", portid );
fflush(stdout);
nb_rx_queue = get_port_n_rx_queues(portid);
n_tx_queue = nb_lcores;
if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
n_tx_queue = MAX_TX_QUEUE_PER_PORT;
printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
nb_rx_queue, (unsigned)n_tx_queue );
ret = rte_eth_dev_configure(portid, nb_rx_queue,
(uint16_t)n_tx_queue, &port_conf);
if (ret < 0)
rte_exit(EXIT_FAILURE,
"Cannot configure device: err=%d, port=%d\n",
ret, portid);
rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
print_ethaddr(" Address:", &ports_eth_addr[portid]);
printf(", ");
print_ethaddr("Destination:",
(const struct ether_addr *)&dest_eth_addr[portid]);
printf(", ");
/*
* prepare src MACs for each port.
*/
ether_addr_copy(&ports_eth_addr[portid],
(struct ether_addr *)(val_eth + portid) + 1);
/* init memory */
ret = init_mem(NB_MBUF);
if (ret < 0)
rte_exit(EXIT_FAILURE, "init_mem failed\n");
/* init one TX queue per couple (lcore,port) */
/* 每个接口都会在每个逻辑核上有个发送队列
比如:(逻辑核,port_id,queue_id):1个接口两个逻辑核时,(0,0,0),(1,0,1)
*/
queueid = 0;
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
if (rte_lcore_is_enabled(lcore_id) == 0)
continue;
if (numa_on)
socketid =
(uint8_t)rte_lcore_to_socket_id(lcore_id);
else
socketid = 0;
printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
fflush(stdout);
rte_eth_dev_info_get(portid, &dev_info);
txconf = &dev_info.default_txconf;
if (port_conf.rxmode.jumbo_frame)
txconf->txq_flags = 0;
ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
socketid, txconf);
if (ret < 0)
rte_exit(EXIT_FAILURE,
"rte_eth_tx_queue_setup: err=%d, "
"port=%d\n", ret, portid);
qconf = &lcore_conf[lcore_id];
qconf->tx_queue_id[portid] = queueid;
queueid++;
qconf->tx_port_id[qconf->n_tx_port] = portid;
qconf->n_tx_port++;
}
printf("\n");
}
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
if (rte_lcore_is_enabled(lcore_id) == 0)
continue;
qconf = &lcore_conf[lcore_id];
printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
fflush(stdout);
/* init RX queues */
for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
portid = qconf->rx_queue_list[queue].port_id;
queueid = qconf->rx_queue_list[queue].queue_id;
if (numa_on)
socketid =
(uint8_t)rte_lcore_to_socket_id(lcore_id);
else
socketid = 0;
printf("rxq=%d,%d,%d ", portid, queueid, socketid);
fflush(stdout);
ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
socketid,
NULL,
pktmbuf_pool[socketid]);
if (ret < 0)
rte_exit(EXIT_FAILURE,
"rte_eth_rx_queue_setup: err=%d, port=%d\n",
ret, portid);
}
}
printf("\n");
/* start ports */
for (portid = 0; portid < nb_ports; portid++) {
if ((enabled_port_mask & (1 << portid)) == 0) {
continue;
}
/* Start device */
ret = rte_eth_dev_start(portid);
if (ret < 0)
rte_exit(EXIT_FAILURE,
"rte_eth_dev_start: err=%d, port=%d\n",
ret, portid);
/*
* If enabled, put device in promiscuous mode.
* This allows IO forwarding mode to forward packets
* to itself through 2 cross-connected ports of the
* target machine.
*/
if (promiscuous_on)
rte_eth_promiscuous_enable(portid);
}
printf("\n");
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
if (rte_lcore_is_enabled(lcore_id) == 0)
continue;
qconf = &lcore_conf[lcore_id];
for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
portid = qconf->rx_queue_list[queue].port_id;
queueid = qconf->rx_queue_list[queue].queue_id;
if (prepare_ptype_parser(portid, queueid) == 0)
rte_exit(EXIT_FAILURE, "ptype check fails\n");
}
}
check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
ret = 0;
/* launch per-lcore init on every lcore */
rte_eal_mp_remote_launch(l3fwd_lkp.main_loop, NULL, CALL_MASTER);
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (rte_eal_wait_lcore(lcore_id) < 0) {
ret = -1;
break;
}
}
/* stop ports */
for (portid = 0; portid < nb_ports; portid++) {
if ((enabled_port_mask & (1 << portid)) == 0)
continue;
printf("Closing port %d...", portid);
rte_eth_dev_stop(portid);
rte_eth_dev_close(portid);
printf(" Done\n");
}
printf("Bye...\n");
return ret;
}
l3fwd.h
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __L3_FWD_H__
#define __L3_FWD_H__
#include <rte_vect.h>
#define DO_RFC_1812_CHECKS
#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON)
#define NO_HASH_MULTI_LOOKUP 1
#endif
#define MAX_PKT_BURST 32
#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
#define MAX_RX_QUEUE_PER_LCORE 16
/*
* Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
*/
#define MAX_TX_BURST (MAX_PKT_BURST / 2)
#define NB_SOCKETS 8
/* Configure how many packets ahead to prefetch, when reading packets */
#define PREFETCH_OFFSET 3
/* Used to mark destination port as 'invalid'. */
#define BAD_PORT ((uint16_t)-1)
#define FWDSTEP 4
/* replace first 12B of the ethernet header. */
#define MASK_ETH 0x3f
/* Hash parameters. */
#ifdef RTE_ARCH_X86_64
/* default to 4 million hash entries (approx) */
#define L3FWD_HASH_ENTRIES (1024*1024*4)
#else
/* 32-bit has less address-space for hugepage memory, limit to 1M entries */
#define L3FWD_HASH_ENTRIES (1024*1024*1)
#endif
#define HASH_ENTRY_NUMBER_DEFAULT 4
struct mbuf_table {
uint16_t len;
struct rte_mbuf *m_table[MAX_PKT_BURST];
};
struct lcore_rx_queue {
uint8_t port_id;
uint8_t queue_id;
} __rte_cache_aligned;
struct lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
uint16_t n_tx_port;
uint16_t tx_port_id[RTE_MAX_ETHPORTS]; // 发送接口列表
uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; // 每个逻辑核为存储了每个发送接口的队列id
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; // 每个接口的发送buffer
void *ipv4_lookup_struct;
void *ipv6_lookup_struct;
} __rte_cache_aligned;
extern volatile bool force_quit;
/* ethernet addresses of ports */
extern uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
/* mask of enabled ports */
extern uint32_t enabled_port_mask;
/* Used only in exact match mode. */
extern int ipv6; /**< ipv6 is false by default. */
extern uint32_t hash_entry_number;
extern xmm_t val_eth[RTE_MAX_ETHPORTS];
extern struct lcore_conf lcore_conf[RTE_MAX_LCORE];
/* Send burst of packets on an output interface */
static inline int
send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
{
struct rte_mbuf **m_table;
int ret;
uint16_t queueid;
queueid = qconf->tx_queue_id[port];
m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
ret = rte_eth_tx_burst(port, queueid, m_table, n);
if (unlikely(ret < n)) {
do {
rte_pktmbuf_free(m_table[ret]);
} while (++ret < n);
}
return 0;
}
/* Enqueue a single packet, and send burst if queue is filled */
static inline int
send_single_packet(struct lcore_conf *qconf,
struct rte_mbuf *m, uint8_t port)
{
uint16_t len;
len = qconf->tx_mbufs[port].len;
qconf->tx_mbufs[port].m_table[len] = m;
len++;
/* enough pkts to be sent */
if (unlikely(len == MAX_PKT_BURST)) {
send_burst(qconf, MAX_PKT_BURST, port);
len = 0;
}
qconf->tx_mbufs[port].len = len;
return 0;
}
#ifdef DO_RFC_1812_CHECKS
static inline int
is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
{
/* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
/*
* 1. The packet length reported by the Link Layer must be large
* enough to hold the minimum length legal IP datagram (20 bytes).
*/
if (link_len < sizeof(struct ipv4_hdr))
return -1;
/* 2. The IP checksum must be correct. */
/* this is checked in H/W */
/*
* 3. The IP version number must be 4. If the version number is not 4
* then the packet may be another version of IP, such as IPng or
* ST-II.
*/
if (((pkt->version_ihl) >> 4) != 4)
return -3;
/*
* 4. The IP header length field must be large enough to hold the
* minimum length legal IP datagram (20 bytes = 5 words).
*/
if ((pkt->version_ihl & 0xf) < 5)
return -4;
/*
* 5. The IP total length field must be large enough to hold the IP
* datagram header, whose length is specified in the IP header length
* field.
*/
if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
return -5;
return 0;
}
#endif /* DO_RFC_1812_CHECKS */
/* Function pointers for LPM or EM functionality. */
void
setup_lpm(const int socketid);
void
setup_hash(const int socketid);
int
em_check_ptype(int portid);
int
lpm_check_ptype(int portid);
uint16_t
em_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
uint16_t
lpm_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
int
em_main_loop(__attribute__((unused)) void *dummy);
int
lpm_main_loop(__attribute__((unused)) void *dummy);
/* Return ipv4/ipv6 fwd lookup struct for LPM or EM. */
void *
em_get_ipv4_l3fwd_lookup_struct(const int socketid);
void *
em_get_ipv6_l3fwd_lookup_struct(const int socketid);
void *
lpm_get_ipv4_l3fwd_lookup_struct(const int socketid);
void *
lpm_get_ipv6_l3fwd_lookup_struct(const int socketid);
#endif /* __L3_FWD_H__ */
l3fwd_lpm.c
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <sys/queue.h>
#include <stdarg.h>
#include <errno.h>
#include <getopt.h>
#include <stdbool.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_cycles.h>
#include <rte_mbuf.h>
#include <rte_ip.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_lpm.h>
#include <rte_lpm6.h>
#include "l3fwd.h"
struct ipv4_l3fwd_lpm_route {
uint32_t ip;
uint8_t depth;
uint8_t if_out;
};
struct ipv6_l3fwd_lpm_route {
uint8_t ip[16];
uint8_t depth;
uint8_t if_out;
};
static struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = {
{IPv4(88, 88, 88, 0), 24, 0},
{IPv4(99, 99, 99, 0), 24, 1},
{IPv4(1, 1, 1, 0), 24, 0},
{IPv4(2, 1, 1, 0), 24, 1},
{IPv4(3, 1, 1, 0), 24, 2},
{IPv4(4, 1, 1, 0), 24, 3},
{IPv4(5, 1, 1, 0), 24, 4},
{IPv4(6, 1, 1, 0), 24, 5},
{IPv4(7, 1, 1, 0), 24, 6},
{IPv4(8, 1, 1, 0), 24, 7},
};
static struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = {
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0},
{{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1},
{{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2},
{{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3},
{{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4},
{{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5},
{{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6},
{{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7},
};
#define IPV4_L3FWD_LPM_NUM_ROUTES \
(sizeof(ipv4_l3fwd_lpm_route_array) / sizeof(ipv4_l3fwd_lpm_route_array[0]))
#define IPV6_L3FWD_LPM_NUM_ROUTES \
(sizeof(ipv6_l3fwd_lpm_route_array) / sizeof(ipv6_l3fwd_lpm_route_array[0]))
#define IPV4_L3FWD_LPM_MAX_RULES 1024
#define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
#define IPV6_L3FWD_LPM_MAX_RULES 1024
#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS];
struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS];
#if defined(__SSE4_1__)
#include "l3fwd_lpm_sse.h"
#else
#include "l3fwd_lpm.h"
#endif
/* main processing loop */
int
lpm_main_loop(__attribute__((unused)) void *dummy)
{
struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
unsigned lcore_id;
uint64_t prev_tsc, diff_tsc, cur_tsc;
int i, nb_rx;
uint8_t portid, queueid;
struct lcore_conf *qconf;
const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
US_PER_S * BURST_TX_DRAIN_US;
prev_tsc = 0;
lcore_id = rte_lcore_id();
qconf = &lcore_conf[lcore_id];
if (qconf->n_rx_queue == 0) {
RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
return 0;
}
RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
for (i = 0; i < qconf->n_rx_queue; i++) {
portid = qconf->rx_queue_list[i].port_id;
queueid = qconf->rx_queue_list[i].queue_id;
RTE_LOG(INFO, L3FWD,
" -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
lcore_id, portid, queueid);
}
while (!force_quit) {
cur_tsc = rte_rdtsc();
/*
* TX burst queue drain
*/
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {
// 每个逻辑核为每个接口初始化一个发送队列
for (i = 0; i < qconf->n_tx_port; ++i) {
portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
send_burst(qconf,
qconf->tx_mbufs[portid].len,
portid);
qconf->tx_mbufs[portid].len = 0;
}
prev_tsc = cur_tsc;
}
/*
* Read packet from RX queues
*/
for (i = 0; i < qconf->n_rx_queue; ++i) {
portid = qconf->rx_queue_list[i].port_id;
queueid = qconf->rx_queue_list[i].queue_id;
nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
MAX_PKT_BURST);
if (nb_rx == 0)
continue;
#if defined(__SSE4_1__)
l3fwd_lpm_send_packets(nb_rx, pkts_burst,
portid, qconf);
#else
l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
portid, qconf);
#endif /* __SSE_4_1__ */
}
}
return 0;
}
void
setup_lpm(const int socketid)
{
struct rte_lpm6_config config;
struct rte_lpm_config config_ipv4;
unsigned i;
int ret;
char s[64];
/* create the LPM table */
config_ipv4.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
config_ipv4.number_tbl8s = IPV4_L3FWD_LPM_NUMBER_TBL8S;
config_ipv4.flags = 0;
snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
ipv4_l3fwd_lpm_lookup_struct[socketid] =
rte_lpm_create(s, socketid, &config_ipv4);
if (ipv4_l3fwd_lpm_lookup_struct[socketid] == NULL)
rte_exit(EXIT_FAILURE,
"Unable to create the l3fwd LPM table on socket %d\n",
socketid);
/* populate the LPM table */
for (i = 0; i < IPV4_L3FWD_LPM_NUM_ROUTES; i++) {
/* skip unused ports */
if ((1 << ipv4_l3fwd_lpm_route_array[i].if_out &
enabled_port_mask) == 0)
continue;
ret = rte_lpm_add(ipv4_l3fwd_lpm_lookup_struct[socketid],
ipv4_l3fwd_lpm_route_array[i].ip,
ipv4_l3fwd_lpm_route_array[i].depth,
ipv4_l3fwd_lpm_route_array[i].if_out);
if (ret < 0) {
rte_exit(EXIT_FAILURE,
"Unable to add entry %u to the l3fwd LPM table on socket %d\n",
i, socketid);
}
printf("LPM: Adding route 0x%08x / %d (%d)\n",
(unsigned)ipv4_l3fwd_lpm_route_array[i].ip,
ipv4_l3fwd_lpm_route_array[i].depth,
ipv4_l3fwd_lpm_route_array[i].if_out);
}
/* create the LPM6 table */
snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
config.flags = 0;
ipv6_l3fwd_lpm_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
&config);
if (ipv6_l3fwd_lpm_lookup_struct[socketid] == NULL)
rte_exit(EXIT_FAILURE,
"Unable to create the l3fwd LPM table on socket %d\n",
socketid);
/* populate the LPM table */
for (i = 0; i < IPV6_L3FWD_LPM_NUM_ROUTES; i++) {
/* skip unused ports */
if ((1 << ipv6_l3fwd_lpm_route_array[i].if_out &
enabled_port_mask) == 0)
continue;
ret = rte_lpm6_add(ipv6_l3fwd_lpm_lookup_struct[socketid],
ipv6_l3fwd_lpm_route_array[i].ip,
ipv6_l3fwd_lpm_route_array[i].depth,
ipv6_l3fwd_lpm_route_array[i].if_out);
if (ret < 0) {
rte_exit(EXIT_FAILURE,
"Unable to add entry %u to the l3fwd LPM table on socket %d\n",
i, socketid);
}
printf("LPM: Adding route %s / %d (%d)\n",
"IPV6",
ipv6_l3fwd_lpm_route_array[i].depth,
ipv6_l3fwd_lpm_route_array[i].if_out);
}
}
int
lpm_check_ptype(int portid)
{
int i, ret;
int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
if (ret <= 0)
return 0;
uint32_t ptypes[ret];
ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
for (i = 0; i < ret; ++i) {
if (ptypes[i] & RTE_PTYPE_L3_IPV4)
ptype_l3_ipv4 = 1;
if (ptypes[i] & RTE_PTYPE_L3_IPV6)
ptype_l3_ipv6 = 1;
}
if (ptype_l3_ipv4 == 0)
printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
if (ptype_l3_ipv6 == 0)
printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
if (ptype_l3_ipv4 && ptype_l3_ipv6)
return 1;
return 0;
}
static inline void
lpm_parse_ptype(struct rte_mbuf *m)
{
struct ether_hdr *eth_hdr;
uint32_t packet_type = RTE_PTYPE_UNKNOWN;
uint16_t ether_type;
eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
ether_type = eth_hdr->ether_type;
if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
m->packet_type = packet_type;
}
uint16_t
lpm_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
struct rte_mbuf *pkts[], uint16_t nb_pkts,
uint16_t max_pkts __rte_unused,
void *user_param __rte_unused)
{
unsigned i;
for (i = 0; i < nb_pkts; ++i)
lpm_parse_ptype(pkts[i]);
return nb_pkts;
}
/* Return ipv4/ipv6 lpm fwd lookup struct. */
void *
lpm_get_ipv4_l3fwd_lookup_struct(const int socketid)
{
return ipv4_l3fwd_lpm_lookup_struct[socketid];
}
void *
lpm_get_ipv6_l3fwd_lookup_struct(const int socketid)
{
return ipv6_l3fwd_lpm_lookup_struct[socketid];
}
- 运行
./l3fwd -c1 -n4 -- -pf0 -L --config="(4,0,0),(5,0,0)" --parse-ptype
PMD: eth_em_dev_init(): port_id 7 vendorID=0x8086 deviceID=0x150c
L3FWD: Longest-prefix match selected
soft parse-ptype is enabled
RTE_MAX_LCORE = 128
lcore_conf[0]: rx_q_idx = 0, queue_id = 0, rx_port_id=4
lcore_conf[0]: rx_q_idx = 1, queue_id = 0, rx_port_id=5
Skipping disabled port 0
Skipping disabled port 1
Skipping disabled port 2
Skipping disabled port 3
Initializing port 4 ... Creating queues: nb_rxq=1 nb_txq=1... Address:00:E0:4C:0B:95:DE, Destination:02:00:00:00:00:04, Allocated mbuf pool on socket 0
LPM: Adding route 0x05010100 / 24 (4)
LPM: Adding route 0x06010100 / 24 (5)
LPM: Adding route 0x07010100 / 24 (6)
LPM: Adding route 0x08010100 / 24 (7)
LPM: Adding route IPV6 / 48 (4)
LPM: Adding route IPV6 / 48 (5)
LPM: Adding route IPV6 / 48 (6)
LPM: Adding route IPV6 / 48 (7)
txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbab24300 hw_ring=0x7fffbab26400 dma_addr=0x13f526400
Initializing port 5 ... Creating queues: nb_rxq=1 nb_txq=1... Address:00:E0:4C:0B:95:DF, Destination:02:00:00:00:00:05, txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbab120c0 hw_ring=0x7fffbab141c0 dma_addr=0x13f5141c0
Initializing port 6 ... Creating queues: nb_rxq=0 nb_txq=1... Address:00:E0:4C:0B:95:E0, Destination:02:00:00:00:00:06, txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbaafff00 hw_ring=0x7fffbab02000 dma_addr=0x13f502000
Initializing port 7 ... Creating queues: nb_rxq=0 nb_txq=1... Address:00:E0:4C:0B:95:E1, Destination:02:00:00:00:00:07, txq=0,0,0 PMD: eth_em_tx_queue_setup(): sw_ring=0x7fffbaaedd40 hw_ring=0x7fffbaaefe40 dma_addr=0x13f4efe40
Initializing rx queues on lcore 0 ... rxq=4,0,0 PMD: eth_em_rx_queue_setup(): sw_ring=0x7fffbaadd800 hw_ring=0x7fffbaaddd00 dma_addr=0x13f4ddd00
rxq=5,0,0 PMD: eth_em_rx_queue_setup(): sw_ring=0x7fffbaacd2c0 hw_ring=0x7fffbaacd7c0 dma_addr=0x13f4cd7c0
PMD: eth_em_start(): <<
PMD: eth_em_start(): <<
PMD: eth_em_start(): <<
PMD: eth_em_start(): <<
Port 4: softly parse packet type info
Port 5: softly parse packet type info
Checking link status..........................................................................................done
Port 4 Link Up - speed 1000 Mbps - full-duplex
Port 5 Link Up - speed 1000 Mbps - full-duplex
Port 6 Link Down
Port 7 Link Down
L3FWD: entering main loop on lcore 0
L3FWD: -- lcoreid=0 portid=4 rxqueueid=0
L3FWD: -- lcoreid=0 portid=5 rxqueueid=0
RTE_MAX_ETHPORTS = 32, lcore_id:0, n_tx_port = 4
- 转发分析
- l3fwd可以进行多核转发,每个逻辑核可以在不同的接口的不同队列进行收包,每个逻辑核也会在每个接口的某个队列进行发包
- l3fwd需要修改来fwd_lpm.c中的ipv4_l3fwd_lpm_route_array来设置路由表
static struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = {
{IPv4(88, 88, 88, 0), 24, 0}, // 此处为我加入的路由信息
{IPv4(99, 99, 99, 0), 24, 1}, // 此处为我加入的路由信息
{IPv4(1, 1, 1, 0), 24, 0},
{IPv4(2, 1, 1, 0), 24, 1},
{IPv4(3, 1, 1, 0), 24, 2},
{IPv4(4, 1, 1, 0), 24, 3},
{IPv4(5, 1, 1, 0), 24, 4},
{IPv4(6, 1, 1, 0), 24, 5},
{IPv4(7, 1, 1, 0), 24, 6},
{IPv4(8, 1, 1, 0), 24, 7},
};
- l3fwd需要设置下一条网关的mac地址
// 单独指定目的接口的mac
// 0:00:e0:4c:0b:01:f4
// 1:00:e0:4c:0a:fe:70
dest_eth_addr[0] = (0xf40000000000 /*<< 40*/) + (0x0100000000/* << 32*/) + (0x0b << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
dest_eth_addr[1] = (0x700000000000/* << 40*/) + (0xfe00000000/* << 32*/) + (0x0a << 24) + (0x4c << 16) + (0xe0 << 8) + (0x00 << 0);
*(uint64_t *)(val_eth + 0) = dest_eth_addr[0];
*(uint64_t *)(val_eth + 1) = dest_eth_addr[1];
- 路由测试
- 拓扑:pc(88.88.88.88)---------l3fwd---------linux(99.99.99.99)
./l3fwd -c3 -n4 -- -p3 -L --config="(0,0,0),(1,0,1)"