NV_NVDLA_cvif.cpp
sdp_e2cvif_rd_req_b_transport
void NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport(int ID, nvdla_dma_rd_req_t* payload, sc_time& delay) {
// DMA request size unit is 32 bytes
uint64_t base_addr;
uint64_t first_base_addr;
uint64_t last_base_addr;
uint64_t cur_address;
uint32_t size_in_byte;
uint32_t total_axi_size;
uint64_t payload_addr;
uint32_t payload_size;
uint8_t* axi_byte_enable_ptr;
uint32_t byte_iter;
bool is_base_64byte_align;
bool is_rear_64byte_align;
bool is_read=true;
nvdla_dbb_extension *nvdla_dbb_ext = NULL;
dla_b_transport_payload *bt_payload;
payload_addr = payload->pd.dma_read_cmd.addr;
payload_size = (payload->pd.dma_read_cmd.size + 1) * DMA_TRANSACTION_ATOM_SIZE; //payload_size's max value is 2^13
is_base_64byte_align = payload_addr%AXI_TRANSACTION_ATOM_SIZE == 0;
first_base_addr = is_base_64byte_align? payload_addr: payload_addr - DMA_TRANSACTION_ATOM_SIZE; // Align to 64B
cur_address = is_base_64byte_align? payload_addr: payload_addr - DMA_TRANSACTION_ATOM_SIZE; // Align to 64B
is_rear_64byte_align = (payload_addr + payload_size) % AXI_TRANSACTION_ATOM_SIZE == 0;
total_axi_size = payload_size + (is_base_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE) + (is_rear_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE);
last_base_addr = first_base_addr + total_axi_size - AXI_TRANSACTION_ATOM_SIZE;
cslDebug((30, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, first_base_addr=0x%lx last_base_addr=0x%lx total_axi_size is 0x%x payload_addr=0x%lx payload_size=0x%x\x0A", first_base_addr, last_base_addr, total_axi_size, payload_addr, payload_size));
// if ( total_axi_size <= (first_base_addr % (CVIF_MAX_MEM_TRANSACTION_SIZE)) ) {
// last_base_addr = first_base_addr;
// } else if ((first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE != 0) {
// if (total_axi_size >= (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE) {
// last_base_addr = first_base_addr + total_axi_size - (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE;
// } else {
// last_base_addr = first_base_addr;
// }
// } else {
// if (total_axi_size >= CVIF_MAX_MEM_TRANSACTION_SIZE) {
// last_base_addr = first_base_addr + total_axi_size - CVIF_MAX_MEM_TRANSACTION_SIZE;
// } else {
// last_base_addr = first_base_addr;
// }
// }
base_addr = first_base_addr;
这段代码与之前提供的代码类似,负责处理来自SDP E模块的DMA读取请求。以下是代码的主要步骤:
-
首先,从传入的
payload
中获取DMA请求的相关信息,包括起始地址(payload_addr
)和请求的数据大小(payload_size
)。payload_size
计算为DMA事务原子(DMA_TRANSACTION_ATOM_SIZE
)大小乘以请求的大小加一。 -
接下来,计算起始地址是否按照64字节对齐(
is_base_64byte_align
),如果不是,则将first_base_addr
设置为payload_addr - DMA_TRANSACTION_ATOM_SIZE
,以确保对齐。同时,初始化cur_address
为first_base_addr
。 -
计算结束地址是否按照64字节对齐(
is_rear_64byte_align
),如果不是,则将total_axi_size
增加DMA_TRANSACTION_ATOM_SIZE
,以确保对齐。计算last_base_addr
为first_base_addr + total_axi_size - AXI_TRANSACTION_ATOM_SIZE
。 -
初始化
base_addr
为first_base_addr
,准备开始处理DMA读取请求。 -
计算并写入DMA请求的原子数量到
sdp_e2cvif_rd_req_atom_num_fifo_
中,以表示请求的原子数。 -
进入循环,根据AXI事务原子的大小(
AXI_TRANSACTION_ATOM_SIZE
)来拆分DMA请求,以便于AXI传输。 -
创建一个
dla_b_transport_payload
对象(bt_payload
),该对象用于传输数据。获取用于设置字节使能的指针(axi_byte_enable_ptr
)。 -
遍历
size_in_byte
(AXI事务原子的大小)字节,设置字节使能,并在需要的情况下将数据字节标记为使能。在每个AXI事务原子的起始字节位置(byte_iter
为AXI事务原子的字节偏移量)写入使能信号到相应的*_rd_atom_enable_fifo_
中。 -
配置
bt_payload
的通用属性,包括地址、大小、读写标志。 -
获取
nvdla_dbb_extension
扩展对象,设置ID、大小和长度等属性。 -
检查
size_in_byte
是否是AXI_TRANSACTION_ATOM_SIZE
的倍数,如果不是,可能会产生错误。 -
将
bt_payload
发送到sdp_e_rd_req_fifo_
中,以便于传输给上游模块。 -
更新循环变量,继续处理下一个AXI事务原子,直到完成整个DMA请求。
整个流程的目标是将SDP E模块的DMA读取请求拆分成AXI事务原子,并将每个原子的数据和控制信息传递给上游模块以执行实际的DMA读取操作。
// Push the number of atoms of the request
// sdp_e2cvif_rd_req_atom_num_fifo_->write(total_axi_size/DMA_TRANSACTION_ATOM_SIZE);
sdp_e2cvif_rd_req_atom_num_fifo_->write(payload_size/DMA_TRANSACTION_ATOM_SIZE);
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write 0x%x to sdp_e2cvif_rd_req_atom_num_fifo_.\x0A", payload_size/DMA_TRANSACTION_ATOM_SIZE));
//Split dma request to axi requests
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, before spliting DMA transaction\x0A"));
while(cur_address <= last_base_addr) {
base_addr = cur_address;
size_in_byte = AXI_TRANSACTION_ATOM_SIZE;
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, prepare AXI transaction for address: 0x%lx\x0A", base_addr));
// base_addr should be aligned to 64B
// size_in_byte should be multiple of 64
// if the data size required by dma mster is 32B, MCIF will drop the extra 32B when AXI returns
// size_in_byte = total_axi_size > (CVIF_MAX_MEM_TRANSACTION_SIZE) ? CVIF_MAX_MEM_TRANSACTION_SIZE : total_axi_size;
// size_in_byte = (total_axi_size - base_addr % (CVIF_MAX_MEM_TRANSACTION_SIZE)) < (CVIF_MAX_MEM_TRANSACTION_SIZE) ? (total_axi_size - base_addr % (CVIF_MAX_MEM_TRANSACTION_SIZE)) : CVIF_MAX_MEM_TRANSACTION_SIZE;
// if (base_addr % CVIF_MAX_MEM_TRANSACTION_SIZE == 0) {
// size_in_byte = total_axi_size > CVIF_MAX_MEM_TRANSACTION_SIZE?CVIF_MAX_MEM_TRANSACTION_SIZE:total_axi_size;
// } else if ( total_axi_size > (base_addr % CVIF_MAX_MEM_TRANSACTION_SIZE)) {
// size_in_byte = base_addr % CVIF_MAX_MEM_TRANSACTION_SIZE;
// } else {
// size_in_byte = total_axi_size;
// }
while (((cur_address + AXI_TRANSACTION_ATOM_SIZE) < (first_base_addr + total_axi_size)) && ((cur_address + AXI_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
size_in_byte += AXI_TRANSACTION_ATOM_SIZE;
cur_address += AXI_TRANSACTION_ATOM_SIZE;
}
// start address of next axi transaction
cur_address += AXI_TRANSACTION_ATOM_SIZE;
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, cur_address=0x%lx base_addr=0x%lx size_in_byte=0x%x\x0A", cur_address, base_addr, size_in_byte));
// Allocating memory for dla_b_transport_payload
bt_payload = new dla_b_transport_payload(size_in_byte, dla_b_transport_payload::DLA_B_TRANSPORT_PAYLOAD_TYPE_MC);
axi_byte_enable_ptr = bt_payload->gp.get_byte_enable_ptr();
// Setup byte enable in payload. Always read 64B from cvif and cvif, but drop unneeded 32B
// Write true to *_rd_atom_enable_fifo_ when the 32B atom is needed by dma client
// Write false to *_rd_atom_enable_fifo_ when the 32B atom is not needed by dma client (dma's addr is not aligned to 64B
// for (byte_iter=0; byte_iter < size_in_byte; byte_iter++) {
// if(base_addr == last_base_addr) { // compare with last_base_addr before first_base_addr for the case of only one axi transaction
// if ( (((true == is_rear_64byte_align) || (byte_iter < (size_in_byte - DMA_TRANSACTION_ATOM_SIZE))) && (first_base_addr != base_addr)) ||
// ( ( ( (true == is_base_64byte_align) && (byte_iter < DMA_TRANSACTION_ATOM_SIZE) ) || ( (true == is_rear_64byte_align) && (byte_iter >= DMA_TRANSACTION_ATOM_SIZE)) ) && (first_base_addr == base_addr)) ){
// axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED;
// if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
// cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write true to sdp_e_rd_atom_enable_fifo_\x0A"));
// sdp_e_rd_atom_enable_fifo_->write(true);
// }
// } else {
// axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED;
// if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
// cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write false to sdp_e_rd_atom_enable_fifo_\x0A"));
// sdp_e_rd_atom_enable_fifo_->write(false);
// }
// }
// }
// else if(base_addr == first_base_addr) {
// if ( (true == is_base_64byte_align) || (byte_iter >= DMA_TRANSACTION_ATOM_SIZE) ){
// axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED;
// if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
// cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write true to sdp_e_rd_atom_enable_fifo_\x0A"));
// sdp_e_rd_atom_enable_fifo_->write(true);
// }
// } else {
// axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED;
// if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
// cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write false to sdp_e_rd_atom_enable_fifo_\x0A"));
// sdp_e_rd_atom_enable_fifo_->write(false);
// }
// }
// }
// else {
// axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED;
// if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
// cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write true to sdp_e_rd_atom_enable_fifo_\x0A"));
// sdp_e_rd_atom_enable_fifo_->write(true);
// }
// }
// }
for (byte_iter=0; byte_iter < size_in_byte; byte_iter++) {
if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (byte_iter < DMA_TRANSACTION_ATOM_SIZE)) {
// Diable 1st DMA atom of the unaligned first_base_addr
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED; // All bytes should be enabled
if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write true to sdp_e_rd_atom_enable_fifo_\x0A"));
sdp_e_rd_atom_enable_fifo_->write(false);
}
} else if (( (base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && (byte_iter >= size_in_byte - DMA_TRANSACTION_ATOM_SIZE)) {
// Diable 2nd DMA atom of the unaligned last_base_addr
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED; // All bytes should be enabled
if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write true to sdp_e_rd_atom_enable_fifo_\x0A"));
sdp_e_rd_atom_enable_fifo_->write(false);
}
} else {
axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED; // All bytes should be enabled
if (0 == byte_iter%DMA_TRANSACTION_ATOM_SIZE) {
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, write true to sdp_e_rd_atom_enable_fifo_\x0A"));
sdp_e_rd_atom_enable_fifo_->write(true);
}
}
}
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, TLM_BYTE_ENABLE is done\x0A"));
这段代码主要完成以下操作:
-
将DMA读取请求的原子数量(
payload_size
除以DMA_TRANSACTION_ATOM_SIZE
)写入sdp_e2cvif_rd_req_atom_num_fifo_
,以指示请求的原子数量。 -
进入循环,拆分DMA请求为AXI请求。每个AXI请求包括一个起始地址(
base_addr
)和一个大小(size_in_byte
)。 -
在循环中,首先为当前AXI请求分配一个
dla_b_transport_payload
对象(bt_payload
),并获取用于设置字节使能的指针(axi_byte_enable_ptr
)。 -
通过遍历
size_in_byte
(AXI事务原子的大小)字节,设置字节使能。根据DMA请求的对齐情况和字节位置,将字节标记为使能或禁用。同时,在每个AXI事务原子的起始字节位置(byte_iter
为AXI事务原子的字节偏移量)写入相应的*_rd_atom_enable_fifo_
中。 -
配置
bt_payload
的通用属性,包括地址、大小和读写标志。 -
继续下一个AXI请求,直到处理完整个DMA请求。
这段代码的目的是将SDP E模块的DMA读取请求拆分成多个AXI事务,每个AXI事务包含数据和控制信息,以便传输给上游模块进行处理。AXI字节的使能信息根据对齐情况和字节位置进行设置,以确保正确的数据传输。
bt_payload->configure_gp(base_addr, size_in_byte, is_read);
bt_payload->gp.get_extension(nvdla_dbb_ext);
nvdla_dbb_ext->set_id(SDP_E_AXI_ID);
nvdla_dbb_ext->set_size(64);
nvdla_dbb_ext->set_length(size_in_byte/AXI_TRANSACTION_ATOM_SIZE);
#pragma CTC SKIP
if(size_in_byte%AXI_TRANSACTION_ATOM_SIZE!=0) {
FAIL(("NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, size_in_byte is not multiple of AXI_TRANSACTION_ATOM_SIZE. size_in_byte=0x%x", size_in_byte));
}
#pragma CTC ENDSKIP
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, before sending data to sdp_e_rd_req_fifo_ addr=0x%lx\x0A", base_addr));
sdp_e_rd_req_fifo_->write(bt_payload);
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, after sending data to sdp_e_rd_req_fifo_ addr=0x%lx\x0A", base_addr));
// total_axi_size -= size_in_byte;
// base_addr += size_in_byte;
}
cslDebug((50, "NV_NVDLA_cvif::sdp_e2cvif_rd_req_b_transport, after spliting DMA transaction\x0A"));
}
这段代码是将拆分后的AXI请求数据打包成bt_payload
,并将其传递给sdp_e_rd_req_fifo_
以进行传输。以下是主要步骤:
-
使用
bt_payload
对象的configure_gp
方法配置通用属性,包括起始地址(base_addr
)、传输大小(size_in_byte
)和读写标志(is_read
)。 -
通过
bt_payload
对象的gp.get_extension
方法获取nvdla_dbb_ext
扩展对象,该对象用于设置AXI请求的扩展属性。 -
设置
nvdla_dbb_ext
的属性,包括ID(SDP_E_AXI_ID
)、大小(64,表示64字节的AXI事务)和长度(size_in_byte
除以AXI_TRANSACTION_ATOM_SIZE
,表示AXI事务的数量)。 -
在发送AXI请求之前,检查
size_in_byte
是否是AXI_TRANSACTION_ATOM_SIZE
的倍数。如果不是,将会触发一个错误。 -
将
bt_payload
对象写入sdp_e_rd_req_fifo_
以进行传输。 -
循环继续处理下一个AXI请求,直到处理完整个DMA请求。
最后,代码在循环结束后输出调试信息,指示DMA事务的拆分已完成。
这段代码的目的是将拆分后的AXI请求打包成适当格式的bt_payload
对象,并将其传递给FIFO进行传输,以便将请求发送到SDP E模块的上游。
ReadResp_cvif2sdp_e
void NV_NVDLA_cvif::ReadResp_cvif2sdp_e() {
uint8_t *axi_atom_ptr;
uint32_t atom_num_left;
nvdla_dma_rd_rsp_t* dma_rd_rsp_payload = NULL;
uint8_t * dma_payload_data_ptr;
uint32_t idx;
atom_num_left = 0;
while(true) {
if(0 == atom_num_left) {
atom_num_left = sdp_e2cvif_rd_req_atom_num_fifo_->read();
cslDebug((50, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, update atom_num_left from sdp_e2cvif_rd_req_atom_num_fifo_, atom_num_left is 0x%x\x0A", atom_num_left));
}
cslDebug((50, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, atom_num_left is 0x%x\x0A", atom_num_left));
dma_rd_rsp_payload = new nvdla_dma_rd_rsp_t;
dma_payload_data_ptr = reinterpret_cast <uint8_t *> (dma_rd_rsp_payload->pd.dma_read_data.data);
dma_rd_rsp_payload->pd.dma_read_data.mask = 0x00;
// 1st atom of the 64B
// Aligen to 32B
axi_atom_ptr = cvif2sdp_e_rd_rsp_fifo_->read();
credit_cvif2sdp_e_rd_rsp_fifo_ ++;
cslDebug((50, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, read 1st atom of the 64B from cvif2sdp_e_rd_rsp_fifo_\x0A"));
atom_num_left--;
dma_rd_rsp_payload->pd.dma_read_data.mask = 0x1;
memcpy (&dma_payload_data_ptr[0], axi_atom_ptr, DMA_TRANSACTION_ATOM_SIZE);
delete[] axi_atom_ptr;
if(atom_num_left>0) {
// 2nd atom of the 64B
axi_atom_ptr = cvif2sdp_e_rd_rsp_fifo_->read();
credit_cvif2sdp_e_rd_rsp_fifo_ ++;
cslDebug((50, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, read 2nd atom of the 64B from cvif2sdp_e_rd_rsp_fifo_\x0A"));
atom_num_left--;
dma_rd_rsp_payload->pd.dma_read_data.mask = (dma_rd_rsp_payload->pd.dma_read_data.mask << 0x1) + 0x1;
memcpy (&dma_payload_data_ptr[DMA_TRANSACTION_ATOM_SIZE], axi_atom_ptr, DMA_TRANSACTION_ATOM_SIZE);
delete[] axi_atom_ptr;
}
cslDebug((70, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, dma_rd_rsp_payload->pd.dma_read_data.mask is 0x%x\x0A", uint32_t(dma_rd_rsp_payload->pd.dma_read_data.mask)));
cslDebug((70, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, dma_rd_rsp_payload->pd.dma_read_data.data are :\x0A"));
for (idx = 0; idx < sizeof(dma_rd_rsp_payload->pd.dma_read_data.data)/sizeof(dma_rd_rsp_payload->pd.dma_read_data.data[0]); idx++) {
cslDebug((70, " 0x%lx\x0A", uint64_t (dma_rd_rsp_payload->pd.dma_read_data.data[idx]))); // The size of data[idx] is 8bytes and its type is "unsigned long"
}
cslDebug((50, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, before NV_NVDLA_cvif_base::cvif2sdp_e_rd_rsp_b_transport(dma_rd_rsp_payload, dma_delay_)\x0A"));
NV_NVDLA_cvif_base::cvif2sdp_e_rd_rsp_b_transport(dma_rd_rsp_payload, dma_delay_);
cslDebug((50, "NV_NVDLA_cvif::ReadResp_cvif2sdp_e, after NV_NVDLA_cvif_base::cvif2sdp_e_rd_rsp_b_transport(dma_rd_rsp_payload, dma_delay_)\x0A"));
delete dma_rd_rsp_payload;
}
}
这段代码是用于从CVIF模块读取SDP E模块的DMA读取响应数据并处理响应的函数。以下是主要步骤:
-
初始化
atom_num_left
,该变量用于跟踪尚未处理的DMA响应中的原子数。 -
进入无限循环,等待新的DMA响应。
-
如果
atom_num_left
为0,从sdp_e2cvif_rd_req_atom_num_fifo_
中读取下一个DMA请求的原子数量,并更新atom_num_left
。 -
读取DMA响应数据的地址、掩码等信息,并将其存储在
dma_rd_rsp_payload
中。 -
从
cvif2sdp_e_rd_rsp_fifo_
中读取DMA响应数据的第一个原子,这个原子是64字节的AXI事务的一部分。然后将其存储在dma_rd_rsp_payload
中,并更新掩码。 -
如果还有原子未读取完,继续读取下一个原子,更新
dma_rd_rsp_payload
中的数据和掩码。 -
打印调试信息,包括响应掩码和数据。
-
调用基类
NV_NVDLA_cvif_base
的cvif2sdp_e_rd_rsp_b_transport
方法,将DMA响应传递给SDP E模块。 -
释放
dma_rd_rsp_payload
对象的内存。
这段代码的目的是从CVIF模块接收SDP E模块的DMA读取响应,并将响应传递给SDP E模块进行进一步处理。这样,CVIF模块可以作为数据传输通道,将DMA请求从SDP E模块传递到外部存储器,并将DMA响应从外部存储器传递回SDP E模块。