两个 ip 实现高速的 核心部分phy 是由 xiphy 和 iob 构成
其中 ddr 的 iob 根据管脚的不同 由 OBUF(ADDR or CTRL), IBUF, IOBUFE3(DQ or DM), OBUFDS (CK), IBUFDS , IOBUFDS(DQS) 等组成
case (IOBTYPE[bitNum*3+:3])
3'b001: //ADDR or CTRL
begin
OBUF OBUF
(
.I(phy2iob_q_out_byte[bitNum])
,.O(iob_pin[bitNum])
);
assign iob2phy_d_in_byte[bitNum] = '0;
end
3'b010:
begin
IBUF IBUF
(
.O(iob2phy_d_in_byte[bitNum])
,.I(iob_pin[bitNum])
);
end
3'b011:
begin //DQ or DM
if (DRAM_TYPE == "DDR4")
begin
if (EN_LVAUX == "TRUE")
begin
IOBUFE3 IOBUF
(
.I(phy2iob_q_out_byte[bitNum])
,.T(phy2iob_t[bitNum])
,.O(iob2phy_d_in_byte[bitNum])
,.IO(iob_pin[bitNum])
,.OSC_EN (1'b0)
,.OSC (4'b0000)
,.DCITERMDISABLE ((USE_DYNAMIC_DCI == 1) ? phy2iob_odt_out_byte[bitNum] : 1'b0)
,.IBUFDISABLE (1'b0)
);
end
else
begin
IOBUFE3 IOBUF
(
.I(phy2iob_q_out_byte[bitNum])
,.T(phy2iob_t[bitNum])
,.O(iob2phy_d_in_byte[bitNum])
,.IO(iob_pin[bitNum])
,.VREF (Vref)
,.OSC_EN (1'b0)
,.OSC (4'b0000)
,.DCITERMDISABLE ((USE_DYNAMIC_DCI == 1) ? phy2iob_odt_out_byte[bitNum] : 1'b0)
,.IBUFDISABLE (1'b0)
);
end
end
else
begin
if (BANK_TYPE == "HP_IO")
begin
IOBUFE3 IOBUF
(
.I(phy2iob_q_out_byte[bitNum])
,.T(phy2iob_t[bitNum])
,.O(iob2phy_d_in_byte[bitNum])
,.IO(iob_pin[bitNum])
,.VREF (1'b0)
,.OSC_EN (1'b0)
,.OSC (4'b0000)
,.DCITERMDISABLE ((USE_DYNAMIC_DCI == 1) ? phy2iob_odt_out_byte[bitNum] : 1'b0)
,.IBUFDISABLE (1'b0)
);
end
else if (BANK_TYPE == "HR_IO")
begin
IOBUF_INTERMDISABLE
#(
.SIM_DEVICE ("ULTRASCALE")
)
IOBUF
(
.I(phy2iob_q_out_byte[bitNum])
,.T(phy2iob_t[bitNum])
,.O(iob2phy_d_in_byte[bitNum])
,.IO(iob_pin[bitNum])
,.INTERMDISABLE ((USE_DYNAMIC_DCI == 1) ? phy2iob_odt_out_byte[bitNum] : 1'b0)
,.IBUFDISABLE (1'b0)
);
end
else
begin
IOBUF IOBUF
(
.I(phy2iob_q_out_byte[bitNum])
,.T(phy2iob_t[bitNum])
,.O(iob2phy_d_in_byte[bitNum])
,.IO(iob_pin[bitNum])
);
end
end
end
3'b101: // CK
if (bitNum % 2 == 0)
begin // generate for even only
OBUFDS OBUFDS
(
.I(phy2iob_q_out_byte[bitNum])
,.O(iob_pin[bitNum])
,.OB(iob_pin[bitNum+1])
);
assign iob2phy_d_in_byte[bitNum] = '0;
end
3'b110:
if (bitNum % 2 == 0)
begin
IBUFDS
#(.DQS_BIAS(DQS_BIAS))
IBUFDS
(
.O(iob2phy_d_in_byte[bitNum])
,.I(iob_pin[bitNum])
,.IB(iob_pin[bitNum+1])
);
end
3'b111: // DQS
if (bitNum % 2 == 0)
begin
IOBUFDS #(.DQS_BIAS(DQS_BIAS))
IO_BUFDS
(
.I(phy2iob_q_out_byte[bitNum])
,.T(phy2iob_t[bitNum])
,.O(iob2phy_d_in_byte[bitNum])
,.IO(iob_pin[bitNum])
,.IOB(iob_pin[bitNum+1])
);
end
default:
begin
// No IO buffer!
end
endcase
xiphy 由 txrx_bitslice / tx_bitslice / rx_bitslice 组成,xxx_bitslice 是一个4:1或者 8:1的串并转换FIFO,给串行数据打拍的 高速clk ,直接由 FIFO_RD_CLK 或者 FIFO_WR_CLK 8倍频或 4倍频 得到,不需要额外提供高速clk;
此元器件即为实现高速的核心元件,在内部较低速度的并行逻辑下,实现接口的高速串行输出。
DQS或者CK信号都是由2倍速的串行高速clk打拍形成,为普通逻辑信号,所以可以实现上下沿同时采集,FIFO_RD_CLK 或者 FIFO_WR_CLK 为实际速率的1/8或1/4,
即传输速率为2400Mbps的DDR4,其串行clk频率即为2400M,其接口CK_T/CK_C的频率为1200M,内部逻辑的clk(FIFO_RD_CLK) 为300M ([7:0],1/8).
如下所示 rx_bitslice, 其中CLK是给LOAD,CE,INC 打拍的低速clk,如果不用delay功能,此clk可不接;
实际即为从 DATAIN 串行输入,Q[7:0] 为 FIFO 的输出,FIFO_RD_CLK, FIFO_RD_EN, FIFO_EMPTY 为指示信号
如果是tx_bitslice 或者 txrx_bitslice 则方向相反,
xx_bitslice 由 bitslice_control 控制
元器件库参考:
UltraScale Architecture Libraries Guide : ug974 ug571
下面是 iodelay 元件的使用
使用 VAR_LOAD的 LOAD或者 VARIABLE 的 INC 功能时要求 EN_VTC = 0;
而在FIXED 模式时 IDELAYCTRL 没有rdy 之前,要求 EN_VTC =1;
IDELAYCTRL 的每个 nibble 共用一个,即每个bank有 8个 IDELAYCTRL;
iodelay 使用 time 模式 并且 对应的 nibble 中没有例化过 bitslice类型的元件(bitslice中含有IDELAYCTRL, bitslice 为 native primitive,iodelay 等 是 Component Primitives,Component Primitives 是 native primitive 的特殊配置生成,用于等效老器件的元件 ),则应使用 IDELAYCTRL
module project(
input sysclk_p,
input sysclk_n,
// input rst,
output out0,
output out1,
output out2
);
wire clk_in;
wire clk;
wire rdy;
reg out0_reg;
reg out1_reg;
reg out2_reg ;
wire [8:0] CNTVALUEOUT;
wire [8:0] CNTVALUEIN;
wire probe_out0;
wire LOAD,INC,CE,RST,EN_VTC;
IBUFDS IBUFDS_refclk (
.O(clk_in), // 1-bit output: Buffer output
.I(sysclk_p), // 1-bit input: Diff_p buffer input (connect directly to top-level port)
.IB(sysclk_n) // 1-bit input: Diff_n buffer input (connect directly to top-level port)
);
clk_wiz_0 instance_name
(
// Clock out ports
.clk_out1(clk), // output clk_out1
// Status and control signals
.reset(1'b0), // input reset
.locked(), // output locked
// Clock in ports
.clk_in1(clk_in));
always @(posedge clk or posedge RST)
if(RST)
begin
out0_reg <= 'h0;
out1_reg <= 'h0;
out2_reg <= 'h0;
end
else
begin
out0_reg <= ~out0_reg;
out1_reg <= ~out1_reg;
out2_reg <= ~out2_reg;
end
assign out1 = out1_reg;
assign out2 = out2_reg;
IDELAYCTRL #(
.SIM_DEVICE("ULTRASCALE") // Must be set to "ULTRASCALE"
)
IDELAYCTRL_inst (
.RDY(rdy), // 1-bit output: Ready output
.REFCLK(clk), // 1-bit input: Reference clock input
.RST(RST) // 1-bit input: Active high reset input. Asynchronous assert, synchronous deassert to
// REFCLK.
);
ODELAYE3 #(
.CASCADE("NONE"), // Cascade setting (MASTER, NONE, SLAVE_END, SLAVE_MIDDLE)
.DELAY_FORMAT("TIME"), // (COUNT, TIME)
.DELAY_TYPE("VAR_LOAD"), // Set the type of tap delay line (FIXED, VARIABLE, VAR_LOAD)
.DELAY_VALUE('d1100), // Output delay tap setting
.IS_CLK_INVERTED(1'b0), // Optional inversion for CLK
.IS_RST_INVERTED(1'b0), // Optional inversion for RST
.REFCLK_FREQUENCY(300.0), // IDELAYCTRL clock input frequency in MHz (200.0-2667.0).
.SIM_DEVICE("ULTRASCALE_PLUS"), // Set the device version (ULTRASCALE, ULTRASCALE_PLUS,
// ULTRASCALE_PLUS_ES1, ULTRASCALE_PLUS_ES2)
.UPDATE_MODE("ASYNC") // Determines when updates to the delay will take effect (ASYNC, MANUAL,
// SYNC)
)
ODELAYE3_inst (
.CASC_OUT(), // 1-bit output: Cascade delay output to IDELAY input cascade
.CNTVALUEOUT(CNTVALUEOUT), // 9-bit output: Counter value output
.DATAOUT(out0), // 1-bit output: Delayed data from ODATAIN input port
.CASC_IN(), // 1-bit input: Cascade delay input from slave IDELAY CASCADE_OUT
.CASC_RETURN(), // 1-bit input: Cascade delay returning from slave IDELAY DATAOUT
.CE(CE), // 1-bit input: Active high enable increment/decrement input
.CLK(clk), // 1-bit input: Clock input
.CNTVALUEIN(CNTVALUEIN), // 9-bit input: Counter value input
.EN_VTC(EN_VTC), // 1-bit input: Keep delay constant over VT
.INC(INC), // 1-bit input: Increment/Decrement tap delay input
.LOAD(LOAD), // 1-bit input: Load DELAY_VALUE input
.ODATAIN(out0_reg), // 1-bit input: Data input
.RST(~rdy) // 1-bit input: Asynchronous Reset to the DELAY_VALUE
);
ila_0 ila_0 (
.clk(clk), // input wire clk
.probe0(CNTVALUEOUT), // input wire [0:0] probe0
.probe1(rdy)
);
vio_0 vio_0 (
.clk(clk), // input wire clk
.probe_in0(CNTVALUEOUT), // input wire [8 : 0] probe_in0
.probe_out0(RST), // output wire [0 : 0] probe_out0
.probe_out1(LOAD),
.probe_out2(INC),
.probe_out3(CE),
.probe_out4(CNTVALUEIN),
.probe_out5(EN_VTC)
);
endmodule