NV_NVDLA_cvif.cpp
cdp2cvif_wr_req_b_transport
void NV_NVDLA_cvif::cdp2cvif_wr_req_b_transport(int ID, nvdla_dma_wr_req_t* payload, sc_time& delay) {
uint32_t packet_id;
uint8_t *dma_payload_data_ptr;
uint8_t *data_ptr;
uint32_t rest_size, incoming_size;
client_cvif_wr_req_t * cdp_wr_req;
packet_id = payload->tag;
if (TAG_CMD == packet_id) {
cdp_wr_req_count_ ++;
#pragma CTC SKIP
if (true == has_cdp_onging_wr_req_) {
FAIL(("NV_NVDLA_cvif::cdp2cvif_wr_req_b_transport, got two consective command request, one command request shall be followed by one or more data request."));
}
#pragma CTC ENDSKIP
else {
has_cdp_onging_wr_req_ = true;
}
cdp_wr_req = new client_cvif_wr_req_t;
cdp_wr_req->addr = payload->pd.dma_write_cmd.addr;
cdp_wr_req->size = (payload->pd.dma_write_cmd.size + 1) * DMA_TRANSACTION_ATOM_SIZE; //In byte
cdp_wr_req->require_ack = payload->pd.dma_write_cmd.require_ack;
cslDebug((50, "before write to cdp2cvif_wr_cmd_fifo_\x0A"));
cdp2cvif_wr_cmd_fifo_->write(cdp_wr_req);
cslDebug((50, "after write to cdp2cvif_wr_cmd_fifo_\x0A"));
cdp_wr_req_got_size_ = 0;
cdp_wr_req_size_ = cdp_wr_req->size;
} else {
dma_payload_data_ptr = reinterpret_cast <uint8_t *> (payload->pd.dma_write_data.data);
rest_size = cdp_wr_req_size_ - cdp_wr_req_got_size_;
incoming_size = min(rest_size, uint32_t (DMA_TRANSACTION_MAX_SIZE));
data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
memcpy(data_ptr, dma_payload_data_ptr, DMA_TRANSACTION_ATOM_SIZE);
cslDebug((50, "before write to cdp2cvif_wr_data_fifo_\x0A"));
cdp2cvif_wr_data_fifo_->write(data_ptr); // Write to FIFO in 32Byte atom
cslDebug((50, "after write to cdp2cvif_wr_data_fifo_\x0A"));
cdp_wr_req_got_size_ += incoming_size;
for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
cslDebug((50, "%x ", data_ptr[i]));
}
cslDebug((50, "\x0A"));
if (incoming_size==DMA_TRANSACTION_MAX_SIZE) {
// The payload is 64B
data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
memcpy(data_ptr, &dma_payload_data_ptr[DMA_TRANSACTION_ATOM_SIZE], DMA_TRANSACTION_ATOM_SIZE);
cslDebug((50, "write to cdp2cvif_wr_data_fifo_\x0A"));
cdp2cvif_wr_data_fifo_->write(data_ptr);
for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
cslDebug((50, "%x ", data_ptr[i]));
}
cslDebug((50, "\x0A"));
}
if (cdp_wr_req_got_size_ == cdp_wr_req_size_) {
has_cdp_onging_wr_req_ = false;
}
}
}
函数cdp2cvif_wr_req_b_transport用于处理来自跨通道数据处理器(CDP)到卷积接口(CVIF)的写请求传输。主要功能如下:
- 解析传入参数,包括请求的tag,通常用于标识请求类型。
- 如果请求类型为命令请求(TAG_CMD),则执行以下操作:
- 检查是否存在连续的命令请求,如果存在则抛出错误。
- 创建新的cdp_wr_req对象,包括地址、大小和是否需要确认等信息。
- 将cdp_wr_req对象写入cdp2cvif_wr_cmd_fifo_,以保存写请求相关信息。
- 重置已接收数据大小(cdp_wr_req_got_size_)和请求数据大小(cdp_wr_req_size_)。
- 对于数据请求,执行以下操作:
- 从payload中获取数据指针dma_payload_data_ptr。
- 计算尚未接收的数据大小(rest_size)和本次实际接收的数据大小(incoming_size)。
- 创建新的uint8_t类型的data_ptr,并从dma_payload_data_ptr中复制数据到data_ptr中。
- 将data_ptr写入cdp2cvif_wr_data_fifo_,以32字节为单位。
- 更新cdp_wr_req_got_size_以跟踪已接收的数据大小。
- 如果incoming_size等于DMA_TRANSACTION_MAX_SIZE(64字节),则继续接收数据并写入cdp2cvif_wr_data_fifo_。
- 如果已接收的数据大小等于请求的数据大小(cdp_wr_req_size_),则表示请求完成,将has_cdp_onging_wr_req_设置为false。
这个函数的主要目的是管理来自CDP到卷积接口(CVIF)之间的写请求传输,包括命令和数据请求,并将相关信息存储到相应的FIFO队列中。此外,它还维护了请求的连续性,以确保数据的正确传输。
WriteRequest_cdp2cvif
void NV_NVDLA_cvif::WriteRequest_cdp2cvif() {
uint64_t base_addr;
uint64_t first_base_addr;
uint64_t last_base_addr;
uint64_t cur_address;
uint32_t size_in_byte;
uint32_t total_axi_size;
uint64_t payload_addr;
uint32_t payload_size;
uint8_t* axi_byte_enable_ptr;
uint32_t byte_iter;
uint32_t atom_iter;
uint32_t atom_num;
bool is_base_64byte_align;
bool is_rear_64byte_align;
bool is_read=false;
uint8_t *axi_atom_ptr;
nvdla_dbb_extension *nvdla_dbb_ext = NULL;
client_cvif_wr_req_t * cdp_wr_req;
dla_b_transport_payload *bt_payload;
while(true) {
// Read one write command
cdp_wr_req = cdp2cvif_wr_cmd_fifo_->read();
payload_addr = cdp_wr_req->addr; // It's aligend to 32B, not 64B
payload_size = cdp_wr_req->size;
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, got one write command from cdp2cvif_wr_cmd_fifo_\x0A"));
cslDebug((50, " payload_addr: 0x%lx\x0A", payload_addr));
cslDebug((50, " payload_size: 0x%x\x0A", payload_size));
is_base_64byte_align = payload_addr%AXI_TRANSACTION_ATOM_SIZE == 0;
first_base_addr = is_base_64byte_align? payload_addr: payload_addr - DMA_TRANSACTION_ATOM_SIZE; // Align to 64B
is_rear_64byte_align = (payload_addr + payload_size) % AXI_TRANSACTION_ATOM_SIZE == 0;
// According to DBB_PV standard, data_length shall be equal or greater than DBB_PV m_size * m_length no matter the transactions is aglined or not
total_axi_size = payload_size + (is_base_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE) + (is_rear_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE);
last_base_addr = first_base_addr + total_axi_size - AXI_TRANSACTION_ATOM_SIZE;
// if ( total_axi_size <= AXI_TRANSACTION_ATOM_SIZE ) {
// // The first and last transaction is actually the same
// last_base_addr = first_base_addr;
// } else {
// last_base_addr = (first_base_addr + total_axi_size) - (first_base_addr + total_axi_size)%AXI_TRANSACTION_ATOM_SIZE;
// }
// if (total_axi_size + first_base_addr%CVIF_MAX_MEM_TRANSACTION_SIZE <= CVIF_MAX_MEM_TRANSACTION_SIZE) {
// // Base and last are in the same AXI transaction
// } else {
// // Base and last are in different AXI transaction
// last_base_addr =
// }
// } else if ((first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE != 0) {
// if (total_axi_size >= (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE) {
// last_base_addr = first_base_addr + total_axi_size - (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE;
// } else {
// last_base_addr = first_base_addr;
// }
// } else {
// if (total_axi_size >= CVIF_MAX_MEM_TRANSACTION_SIZE) {
// last_base_addr = first_base_addr + total_axi_size - CVIF_MAX_MEM_TRANSACTION_SIZE;
// } else {
// last_base_addr = first_base_addr;
// }
// }
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif:\x0A"));
cslDebug((50, " first_base_addr: 0x%lx\x0A", first_base_addr));
cslDebug((50, " last_base_addr: 0x%lx\x0A", last_base_addr));
cslDebug((50, " total_axi_size: 0x%x\x0A", total_axi_size));
// cur_address = payload_addr;
cur_address = is_base_64byte_align? payload_addr: first_base_addr; // Align to 64B
//Split dma request to axi requests
// while(cur_address < payload_addr + payload_size) {}
while(cur_address <= last_base_addr) {
base_addr = cur_address;
size_in_byte = AXI_TRANSACTION_ATOM_SIZE;
// Check whether next ATOM belongs to current AXI transaction
// while (((cur_address + DMA_TRANSACTION_ATOM_SIZE) < (payload_addr + payload_size)) && ((cur_address + DMA_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
// size_in_byte += DMA_TRANSACTION_ATOM_SIZE;
// cur_address += DMA_TRANSACTION_ATOM_SIZE;
// }
while (((cur_address + AXI_TRANSACTION_ATOM_SIZE) < (first_base_addr + total_axi_size)) && ((cur_address + AXI_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
size_in_byte += AXI_TRANSACTION_ATOM_SIZE;
cur_address += AXI_TRANSACTION_ATOM_SIZE;
}
// start address of next axi transaction
cur_address += AXI_TRANSACTION_ATOM_SIZE;
atom_num = size_in_byte / DMA_TRANSACTION_ATOM_SIZE;
bt_payload = new dla_b_transport_payload(size_in_byte, dla_b_transport_payload::DLA_B_TRANSPORT_PAYLOAD_TYPE_MC);
axi_byte_enable_ptr = bt_payload->gp.get_byte_enable_ptr();
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, base_addr=0x%lx size_in_byte=0x%x atom_num=0x%x\x0A", base_addr, size_in_byte, atom_num));
for (byte_iter=0; byte_iter < size_in_byte; byte_iter++) {
if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (byte_iter < DMA_TRANSACTION_ATOM_SIZE)) {
// Diable 1st DMA atom of the unaligned first_base_addr
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED; // All bytes should be enabled
} else if (( (base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && (byte_iter >= size_in_byte - DMA_TRANSACTION_ATOM_SIZE)) {
// Diable 2nd DMA atom of the unaligned last_base_addr
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED; // All bytes should be enabled
} else {
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED; // All bytes should be enabled
}
}
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, TLM_BYTE_ENABLE is done\x0A"));
for (atom_iter=0; atom_iter < atom_num; atom_iter++) {
if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (0 == atom_iter)) {
// Disable 1st DMA atom of the unaligned first_base_addr
// Use unaligned address as required by DBB_PV
memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
} else if (((base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && ( (atom_iter + 1) == atom_num)) {
// Disable 2nd DMA atom of the unaligned last_base_addr
memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
} else {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, before read an atom from cdp2cvif_wr_data_fifo_, base_addr = 0x%lx, atom_iter=0x%x\x0A", base_addr, atom_iter));
axi_atom_ptr = cdp2cvif_wr_data_fifo_->read();
for(int i=0; i<DMA_TRANSACTION_ATOM_SIZE; i++) {
cslDebug((50, "%02x ", axi_atom_ptr[i]));
}
cslDebug((50, "\x0A"));
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, after read an atom from cdp2cvif_wr_data_fifo_\x0A"));
memcpy(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], axi_atom_ptr, DMA_TRANSACTION_ATOM_SIZE);
delete[] axi_atom_ptr;
}
}
if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) ) {
base_addr += DMA_TRANSACTION_ATOM_SIZE;
}
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, base_address=0x%lx size in byte=0x%x\x0A", base_addr, size_in_byte));
// Prepare write payload
bt_payload->configure_gp(base_addr, size_in_byte, is_read);
bt_payload->gp.get_extension(nvdla_dbb_ext);
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, sending write command to cdp_wr_req_fifo_.\x0A"));
cslDebug((50, " addr: 0x%016lx\x0A", base_addr));
cslDebug((50, " size: %d\x0A", size_in_byte));
nvdla_dbb_ext->set_id(CDP_AXI_ID);
nvdla_dbb_ext->set_size(64);
nvdla_dbb_ext->set_length(size_in_byte/AXI_TRANSACTION_ATOM_SIZE);
// if (base_addr%AXI_TRANSACTION_ATOM_SIZE != 0) //Set length(in unit of 64B) to be same as RTL
// nvdla_dbb_ext->set_length(((size_in_byte - DMA_TRANSACTION_ATOM_SIZE) + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE);
// else // base_addr is aligned to 64Bytes
// nvdla_dbb_ext->set_length((size_in_byte + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE-1);
// write payload to arbiter fifo
cdp_wr_req_fifo_->write(bt_payload);
// When the last split req is sent to ext, write true to cdp_wr_required_ack_fifo_ when ack is required.
if (cur_address >= (payload_addr + payload_size)) {
if(cdp_wr_req->require_ack!=0) {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, require ack.\x0A"));
cdp_wr_required_ack_fifo_->write(true);
}
else {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, does not require ack.\x0A"));
cdp_wr_required_ack_fifo_->write(false);
}
}
else {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, does not require ack.\x0A"));
cdp_wr_required_ack_fifo_->write(false);
}
}
delete cdp_wr_req;
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, write command processing done\x0A"));
}
}
这段代码是NV_NVDLA_cvif
类中的WriteRequest_cdp2cvif
函数,它处理来自跨通道数据处理器(CDP)到卷积接口(CVIF)的写请求。主要功能如下:
- 读取从
cdp2cvif_wr_cmd_fifo_
中接收到的写请求命令。 - 解析请求的地址(payload_addr)和大小(payload_size)。
- 确定请求是否以64字节对齐(is_base_64byte_align)以及请求的起始和结束地址(first_base_addr和last_base_addr)。
- 根据请求的大小,将请求分割成一个或多个AXI事务。
- 为每个AXI事务创建
dla_b_transport_payload
对象,配置字节启用位(axi_byte_enable_ptr)以处理不对齐的情况。 - 从
cdp2cvif_wr_data_fifo_
中读取数据并复制到AXI事务中。 - 将AXI事务的信息写入
cdp_wr_req_fifo_
,包括地址、大小和ID等。 - 如果请求需要确认(require_ack),则向
cdp_wr_required_ack_fifo_
中写入true,否则写入false。 - 删除已处理的写请求对象。
- 循环执行,处理下一个写请求。
这个函数的主要目的是将CDP到CVIF之间的写请求拆分成多个AXI事务,并将这些事务发送给卷积接口(CVIF)。它还处理不对齐的情况,并在需要时发送确认。
rbk2cvif_wr_req_b_transport
void NV_NVDLA_cvif::rbk2cvif_wr_req_b_transport(int ID, nvdla_dma_wr_req_t* payload, sc_time& delay) {
uint32_t packet_id;
uint8_t *dma_payload_data_ptr;
uint8_t *data_ptr;
uint32_t rest_size, incoming_size;
client_cvif_wr_req_t * rbk_wr_req;
packet_id = payload->tag;
if (TAG_CMD == packet_id) {
rbk_wr_req_count_ ++;
#pragma CTC SKIP
if (true == has_rbk_onging_wr_req_) {
FAIL(("NV_NVDLA_cvif::rbk2cvif_wr_req_b_transport, got two consective command request, one command request shall be followed by one or more data request."));
}
#pragma CTC ENDSKIP
else {
has_rbk_onging_wr_req_ = true;
}
rbk_wr_req = new client_cvif_wr_req_t;
rbk_wr_req->addr = payload->pd.dma_write_cmd.addr;
rbk_wr_req->size = (payload->pd.dma_write_cmd.size + 1) * DMA_TRANSACTION_ATOM_SIZE; //In byte
rbk_wr_req->require_ack = payload->pd.dma_write_cmd.require_ack;
cslDebug((50, "before write to rbk2cvif_wr_cmd_fifo_\x0A"));
rbk2cvif_wr_cmd_fifo_->write(rbk_wr_req);
cslDebug((50, "after write to rbk2cvif_wr_cmd_fifo_\x0A"));
rbk_wr_req_got_size_ = 0;
rbk_wr_req_size_ = rbk_wr_req->size;
这段代码是一个用于数据传输的函数,用于处理来自RBK(Reconfigurable Block)到CVIF(Cache Virtual Interface)的写请求传输。以下是该函数的主要功能:
-
解析输入参数:
- 通过payload参数获取请求的tag,通常用于标识请求类型。
-
对于命令请求(TAG_CMD):
- 如果已经存在连续的命令请求,抛出错误。
- 否则,创建一个新的rbk_wr_req对象,并初始化它的地址、大小和是否需要确认。
- 将rbk_wr_req对象写入rbk2cvif_wr_cmd_fifo_中,该对象包含了写请求的相关信息。
- 重置rbk_wr_req_got_size_和rbk_wr_req_size_。
这个函数的主要作用是处理RBK到CVIF之间的写请求传输,包括命令请求,以确保数据的正确传输,并维护请求的连续性。这段代码未提供完整的函数实现,但是可以看出它与之前提到的CDP到CVIF写请求处理函数具有相似的结构和目的。
} else {
dma_payload_data_ptr = reinterpret_cast <uint8_t *> (payload->pd.dma_write_data.data);
rest_size = rbk_wr_req_size_ - rbk_wr_req_got_size_;
incoming_size = min(rest_size, uint32_t (DMA_TRANSACTION_MAX_SIZE));
data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
memcpy(data_ptr, dma_payload_data_ptr, DMA_TRANSACTION_ATOM_SIZE);
cslDebug((50, "before write to rbk2cvif_wr_data_fifo_\x0A"));
rbk2cvif_wr_data_fifo_->write(data_ptr); // Write to FIFO in 32Byte atom
cslDebug((50, "after write to rbk2cvif_wr_data_fifo_\x0A"));
rbk_wr_req_got_size_ += incoming_size;
for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
cslDebug((50, "%x ", data_ptr[i]));
}
cslDebug((50, "\x0A"));
if (incoming_size==DMA_TRANSACTION_MAX_SIZE) {
// The payload is 64B
data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
memcpy(data_ptr, &dma_payload_data_ptr[DMA_TRANSACTION_ATOM_SIZE], DMA_TRANSACTION_ATOM_SIZE);
cslDebug((50, "write to rbk2cvif_wr_data_fifo_\x0A"));
rbk2cvif_wr_data_fifo_->write(data_ptr);
for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
cslDebug((50, "%x ", data_ptr[i]));
}
cslDebug((50, "\x0A"));
}
if (rbk_wr_req_got_size_ == rbk_wr_req_size_) {
has_rbk_onging_wr_req_ = false;
}
}
}
这段代码是一个用于数据传输的函数,用于处理来自RBK(Reconfigurable Block)到CVIF(Cache Virtual Interface)的写请求传输。以下是该函数的主要功能:
- 对于数据请求(不是命令请求):
- 从payload参数中获取数据指针
dma_payload_data_ptr
,该指针包含要写入CVIF的数据。 - 计算尚未接收的数据大小(
rest_size
)和本次实际接收的数据大小(incoming_size
),并选择较小的值。 - 创建新的uint8_t类型的
data_ptr
,并从dma_payload_data_ptr
中复制数据到data_ptr
中。 - 将
data_ptr
写入rbk2cvif_wr_data_fifo_
中,以32字节为单位进行数据传输。 - 更新已接收数据大小(
rbk_wr_req_got_size_
)。 - 如果
incoming_size
等于DMA_TRANSACTION_MAX_SIZE
(64字节),则继续接收数据并写入rbk2cvif_wr_data_fifo_
。 - 如果已接收的数据大小等于请求的数据大小(
rbk_wr_req_size_
),则表示请求完成,将has_rbk_onging_wr_req_
设置为false。
- 从payload参数中获取数据指针
这个函数的主要目的是处理RBK到CVIF之间的数据写请求,包括数据的接收和传输,以确保数据的正确传输并维护请求的连续性。它类似于之前提到的CDP到CVIF的写请求处理函数,但用于RBK到CVIF的数据传输。
WriteRequest_rbk2cvif
void NV_NVDLA_cvif::WriteRequest_rbk2cvif() {
uint64_t base_addr;
uint64_t first_base_addr;
uint64_t last_base_addr;
uint64_t cur_address;
uint32_t size_in_byte;
uint32_t total_axi_size;
uint64_t payload_addr;
uint32_t payload_size;
uint8_t* axi_byte_enable_ptr;
uint32_t byte_iter;
uint32_t atom_iter;
uint32_t atom_num;
bool is_base_64byte_align;
bool is_rear_64byte_align;
bool is_read=false;
uint8_t *axi_atom_ptr;
nvdla_dbb_extension *nvdla_dbb_ext = NULL;
client_cvif_wr_req_t * rbk_wr_req;
dla_b_transport_payload *bt_payload;
while(true) {
// Read one write command
rbk_wr_req = rbk2cvif_wr_cmd_fifo_->read();
payload_addr = rbk_wr_req->addr; // It's aligend to 32B, not 64B
payload_size = rbk_wr_req->size;
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, got one write command from rbk2cvif_wr_cmd_fifo_\x0A"));
cslDebug((50, " payload_addr: 0x%lx\x0A", payload_addr));
cslDebug((50, " payload_size: 0x%x\x0A", payload_size));
is_base_64byte_align = payload_addr%AXI_TRANSACTION_ATOM_SIZE == 0;
first_base_addr = is_base_64byte_align? payload_addr: payload_addr - DMA_TRANSACTION_ATOM_SIZE; // Align to 64B
is_rear_64byte_align = (payload_addr + payload_size) % AXI_TRANSACTION_ATOM_SIZE == 0;
// According to DBB_PV standard, data_length shall be equal or greater than DBB_PV m_size * m_length no matter the transactions is aglined or not
total_axi_size = payload_size + (is_base_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE) + (is_rear_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE);
last_base_addr = first_base_addr + total_axi_size - AXI_TRANSACTION_ATOM_SIZE;
// if ( total_axi_size <= AXI_TRANSACTION_ATOM_SIZE ) {
// // The first and last transaction is actually the same
// last_base_addr = first_base_addr;
// } else {
// last_base_addr = (first_base_addr + total_axi_size) - (first_base_addr + total_axi_size)%AXI_TRANSACTION_ATOM_SIZE;
// }
// if (total_axi_size + first_base_addr%CVIF_MAX_MEM_TRANSACTION_SIZE <= CVIF_MAX_MEM_TRANSACTION_SIZE) {
// // Base and last are in the same AXI transaction
// } else {
// // Base and last are in different AXI transaction
// last_base_addr =
// }
// } else if ((first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE != 0) {
// if (total_axi_size >= (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE) {
// last_base_addr = first_base_addr + total_axi_size - (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE;
// } else {
// last_base_addr = first_base_addr;
// }
// } else {
// if (total_axi_size >= CVIF_MAX_MEM_TRANSACTION_SIZE) {
// last_base_addr = first_base_addr + total_axi_size - CVIF_MAX_MEM_TRANSACTION_SIZE;
// } else {
// last_base_addr = first_base_addr;
// }
// }
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif:\x0A"));
cslDebug((50, " first_base_addr: 0x%lx\x0A", first_base_addr));
cslDebug((50, " last_base_addr: 0x%lx\x0A", last_base_addr));
cslDebug((50, " total_axi_size: 0x%x\x0A", total_axi_size));
这段代码是NV_NVDLA_cvif
类中的WriteRequest_rbk2cvif
函数,它用于处理来自RBK(Reconfigurable Block)到CVIF(Cache Virtual Interface)的写请求传输。以下是该函数的主要功能:
- 读取从
rbk2cvif_wr_cmd_fifo_
中接收到的写请求命令。 - 解析请求的地址(payload_addr)和大小(payload_size)。
- 确定请求是否以64字节对齐(is_base_64byte_align)以及请求的起始和结束地址(first_base_addr和last_base_addr)。
- 根据请求的大小,将请求分割成一个或多个AXI事务。
- 为每个AXI事务创建
dla_b_transport_payload
对象,配置字节启用位(axi_byte_enable_ptr)以处理不对齐的情况。 - 从
rbk2cvif_wr_data_fifo_
中读取数据并复制到AXI事务中。 - 将AXI事务的信息写入
cdp_wr_req_fifo_
,包括地址、大小和ID等。 - 如果请求需要确认(require_ack),则向
rbk_wr_required_ack_fifo_
中写入true,否则写入false。
这个函数的主要目的是将RBK到CVIF之间的写请求拆分成多个AXI事务,并将这些事务发送给卷积接口(CVIF)。它还处理不对齐的情况,并在需要时发送确认。这段代码的结构与之前提到的CDP到CVIF的写请求处理函数类似,用于RBK到CVIF的数据传输。
// cur_address = payload_addr;
cur_address = is_base_64byte_align? payload_addr: first_base_addr; // Align to 64B
//Split dma request to axi requests
// while(cur_address < payload_addr + payload_size) {}
while(cur_address <= last_base_addr) {
base_addr = cur_address;
size_in_byte = AXI_TRANSACTION_ATOM_SIZE;
// Check whether next ATOM belongs to current AXI transaction
// while (((cur_address + DMA_TRANSACTION_ATOM_SIZE) < (payload_addr + payload_size)) && ((cur_address + DMA_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
// size_in_byte += DMA_TRANSACTION_ATOM_SIZE;
// cur_address += DMA_TRANSACTION_ATOM_SIZE;
// }
while (((cur_address + AXI_TRANSACTION_ATOM_SIZE) < (first_base_addr + total_axi_size)) && ((cur_address + AXI_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
size_in_byte += AXI_TRANSACTION_ATOM_SIZE;
cur_address += AXI_TRANSACTION_ATOM_SIZE;
}
// start address of next axi transaction
cur_address += AXI_TRANSACTION_ATOM_SIZE;
atom_num = size_in_byte / DMA_TRANSACTION_ATOM_SIZE;
bt_payload = new dla_b_transport_payload(size_in_byte, dla_b_transport_payload::DLA_B_TRANSPORT_PAYLOAD_TYPE_MC);
axi_byte_enable_ptr = bt_payload->gp.get_byte_enable_ptr();
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, base_addr=0x%lx size_in_byte=0x%x atom_num=0x%x\x0A", base_addr, size_in_byte, atom_num));
for (byte_iter=0; byte_iter < size_in_byte; byte_iter++) {
if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (byte_iter < DMA_TRANSACTION_ATOM_SIZE)) {
// Diable 1st DMA atom of the unaligned first_base_addr
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED; // All bytes should be enabled
} else if (( (base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && (byte_iter >= size_in_byte - DMA_TRANSACTION_ATOM_SIZE)) {
// Diable 2nd DMA atom of the unaligned last_base_addr
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED; // All bytes should be enabled
} else {
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED; // All bytes should be enabled
}
}
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, TLM_BYTE_ENABLE is done\x0A"));
for (atom_iter=0; atom_iter < atom_num; atom_iter++) {
if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (0 == atom_iter)) {
// Disable 1st DMA atom of the unaligned first_base_addr
// Use unaligned address as required by DBB_PV
memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
} else if (((base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && ( (atom_iter + 1) == atom_num)) {
// Disable 2nd DMA atom of the unaligned last_base_addr
memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
} else {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, before read an atom from rbk2cvif_wr_data_fifo_, base_addr = 0x%lx, atom_iter=0x%x\x0A", base_addr, atom_iter));
axi_atom_ptr = rbk2cvif_wr_data_fifo_->read();
for(int i=0; i<DMA_TRANSACTION_ATOM_SIZE; i++) {
cslDebug((50, "%02x ", axi_atom_ptr[i]));
}
cslDebug((50, "\x0A"));
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, after read an atom from rbk2cvif_wr_data_fifo_\x0A"));
memcpy(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], axi_atom_ptr, DMA_TRANSACTION_ATOM_SIZE);
delete[] axi_atom_ptr;
}
}
这部分代码是WriteRequest_rbk2cvif
函数中的循环部分,用于拆分RBK到CVIF的写请求命令为多个AXI事务并生成相应的dla_b_transport_payload
,以便发送到CVIF接口。
具体操作如下:
-
首先,它从
rbk2cvif_wr_cmd_fifo_
中读取一个写请求命令,获取请求的起始地址(payload_addr
)和大小(payload_size
)。 -
然后,它确定请求是否以64字节对齐,并计算请求的起始地址(
first_base_addr
)和结束地址(last_base_addr
)。 -
接下来,它以64字节对齐的方式初始化当前地址(
cur_address
)。 -
循环处理,根据请求的大小和对齐情况,将请求分割成一个或多个AXI事务。它检查是否下一个数据原子(DMA_TRANSACTION_ATOM_SIZE)仍然属于当前AXI事务,如果是,则增加事务的大小(
size_in_byte
)。 -
对于每个AXI事务,它为事务创建一个
dla_b_transport_payload
对象,并配置字节启用位(axi_byte_enable_ptr
)以处理不对齐的情况。 -
根据是否处于不对齐的情况以及是否为第一个或最后一个数据原子,设置字节启用位。如果是不对齐的情况,它会禁用相应的字节。
-
对于每个数据原子,它从
rbk2cvif_wr_data_fifo_
中读取数据(axi_atom_ptr
)并将数据复制到AXI事务中。 -
最后,它将生成的AXI事务的信息写入
cdp_wr_req_fifo_
,包括地址、大小和其他相关信息。
这个循环部分的主要目的是将一个写请求拆分为多个AXI事务,以确保数据的正确传输和对齐,并生成相应的AXI事务数据。这些AXI事务将发送到CVIF接口进行处理。
if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) ) {
base_addr += DMA_TRANSACTION_ATOM_SIZE;
}
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, base_address=0x%lx size in byte=0x%x\x0A", base_addr, size_in_byte));
// Prepare write payload
bt_payload->configure_gp(base_addr, size_in_byte, is_read);
bt_payload->gp.get_extension(nvdla_dbb_ext);
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, sending write command to rbk_wr_req_fifo_.\x0A"));
cslDebug((50, " addr: 0x%016lx\x0A", base_addr));
cslDebug((50, " size: %d\x0A", size_in_byte));
nvdla_dbb_ext->set_id(RBK_AXI_ID);
nvdla_dbb_ext->set_size(64);
nvdla_dbb_ext->set_length(size_in_byte/AXI_TRANSACTION_ATOM_SIZE);
// if (base_addr%AXI_TRANSACTION_ATOM_SIZE != 0) //Set length(in unit of 64B) to be same as RTL
// nvdla_dbb_ext->set_length(((size_in_byte - DMA_TRANSACTION_ATOM_SIZE) + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE);
// else // base_addr is aligned to 64Bytes
// nvdla_dbb_ext->set_length((size_in_byte + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE-1);
// write payload to arbiter fifo
rbk_wr_req_fifo_->write(bt_payload);
// When the last split req is sent to ext, write true to rbk_wr_required_ack_fifo_ when ack is required.
if (cur_address >= (payload_addr + payload_size)) {
if(rbk_wr_req->require_ack!=0) {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, require ack.\x0A"));
rbk_wr_required_ack_fifo_->write(true);
}
else {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, does not require ack.\x0A"));
rbk_wr_required_ack_fifo_->write(false);
}
}
else {
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, does not require ack.\x0A"));
rbk_wr_required_ack_fifo_->write(false);
}
}
delete rbk_wr_req;
cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, write command processing done\x0A"));
}
}
这段代码是 WriteRequest_rbk2cvif
函数的后续部分,处理了生成的 AXI 事务的尾部以及根据是否需要 ACK 来向 rbk_wr_required_ack_fifo_
写入信息。
具体步骤如下:
-
首先,它检查是否在一个不对齐的事务的开头,如果是,则增加
base_addr
以处理下一个 DMA 原子。 -
然后,它将
bt_payload
的属性配置,包括地址、大小和读写模式。 -
接着,它获取
nvdla_dbb_ext
扩展,用于设置 AXI 事务的 ID、大小和长度。 -
然后,它将
bt_payload
写入rbk_wr_req_fifo_
,将生成的 AXI 事务添加到 FIFO 队列中,以便之后将其发送到 CVIF 接口。 -
接下来,它检查当前是否已经处理了请求的最后一个 AXI 事务。如果是,它会根据请求是否需要 ACK 来向
rbk_wr_required_ack_fifo_
写入相应的信息。如果需要 ACK,它将写入true
,否则写入false
。 -
最后,它删除了用于生成 AXI 事务的
rbk_wr_req
对象,释放内存。
这段代码主要用于处理 AXI 事务的尾部,配置 AXI 事务属性并将其写入 FIFO,以及确定是否需要 ACK。这样就完成了从 RBK 到 CVIF 的写请求的处理。