diff --git a/Bender.yml b/Bender.yml index 0c18234..f5c1501 100644 --- a/Bender.yml +++ b/Bender.yml @@ -23,6 +23,7 @@ sources: - rtl/basic/hwpe_stream_demux_static.sv - rtl/basic/hwpe_stream_deserialize.sv - rtl/basic/hwpe_stream_fence.sv + - rtl/basic/hwpe_stream_fence_asymmetric.sv - rtl/basic/hwpe_stream_merge.sv - rtl/basic/hwpe_stream_mux_static.sv - rtl/basic/hwpe_stream_serialize.sv @@ -32,6 +33,7 @@ sources: - rtl/streamer/hwpe_stream_addressgen.sv - rtl/streamer/hwpe_stream_addressgen_v2.sv - rtl/streamer/hwpe_stream_addressgen_v3.sv + - rtl/streamer/hwpe_stream_addressgen_v4.sv - rtl/streamer/hwpe_stream_sink_realign.sv - rtl/streamer/hwpe_stream_source_realign.sv - rtl/streamer/hwpe_stream_strbgen.sv diff --git a/rtl/basic/hwpe_stream_fence_asymmetric.sv b/rtl/basic/hwpe_stream_fence_asymmetric.sv new file mode 100644 index 0000000..8a816ce --- /dev/null +++ b/rtl/basic/hwpe_stream_fence_asymmetric.sv @@ -0,0 +1,165 @@ +/* + * hwpe_stream_fence_aymmetric.sv + * Arpan Suravi Prasad + * + * Copyright (C) 2014-2018 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hwpe_stream_fence_asymmetric** module is used to synchronize the handshake between + * `NB_STREAMS` streams. + * This is necessary, for example, when 2 asymmetric(different datawidth) streams are produced + * from separate TCDM accesses and have to be joined into a single, wider + * stream. + * + * .. _wavedrom_hwpe_stream_fence: + * .. wavedrom:: wavedrom/hwpe_stream_fence.json + * :width: 85 % + * :caption: Example of **hwpe_stream_fence** operation. + * + * .. tabularcolumns:: |l|l|J| + * .. _hwpe_stream_fence_params: + * .. table:: **hwpe_stream_fence** design-time parameters. + * + * +------------------+-------------+---------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +------------------+-------------+---------------------------------------------+ + * | *NB_STREAMS* | 2 | Number of input/output HWPE-Stream streams. | + * +------------------+-------------+---------------------------------------------+ + * | *DATA_WIDTH* | 32 | Width of the HWPE-Stream streams. | + * +------------------+-------------+---------------------------------------------+ + */ + +import hwpe_stream_package::*; + +module hwpe_stream_fence_asymmetric #( + localparam int unsigned NB_STREAMS = 2, + parameter int unsigned ELEM_WIDTH_0 = 32, + parameter int unsigned ELEM_WIDTH_1 = 32, + parameter int unsigned DATA_WIDTH_0 = 32, + parameter int unsigned DATA_WIDTH_1 = 32 +) +( + input logic clk_i, + input logic rst_ni, + input logic clear_i, + input logic bypass_i, + + hwpe_stream_intf_stream.sink push_0_i, + hwpe_stream_intf_stream.sink push_1_i, + hwpe_stream_intf_stream.source pop_0_o, + hwpe_stream_intf_stream.source pop_1_o +); + + logic [NB_STREAMS-1:0] in_valid; + logic out_valid; + logic [NB_STREAMS-1:0] fence_state_q, fence_state_d; + logic [DATA_WIDTH_0-1:0] data_0_d, data_0_q; + logic [DATA_WIDTH_1-1:0] data_1_d, data_1_q; + logic [DATA_WIDTH_0/ELEM_WIDTH_0-1:0] strb_0_d, strb_0_q; + logic [DATA_WIDTH_1/ELEM_WIDTH_1-1:0] strb_1_d, strb_1_q; + + logic [NB_STREAMS-1:0] in_strm_hs, out_strm_hs; + + assign in_strm_hs[0] = push_0_i.ready & push_0_i.valid; + assign in_strm_hs[1] = push_1_i.ready & push_1_i.valid; + + assign out_strm_hs[0] = pop_0_o.ready & pop_0_o.valid; + assign out_strm_hs[1] = pop_1_o.ready & pop_1_o.valid; + + assign in_valid[0] = push_0_i.valid; + assign in_valid[1] = push_1_i.valid; + + // Can take element if there is nothing registered or if registered there is a handshake + assign push_0_i.ready = ~fence_state_q[0] || fence_state_q[0] & out_strm_hs[0]; + assign push_1_i.ready = ~fence_state_q[1] || fence_state_q[1] & out_strm_hs[1]; + + assign out_valid = &( fence_state_q | in_valid); + + assign pop_0_o.valid = bypass_i ? push_0_i.valid | fence_state_q[0] : out_valid; + assign pop_1_o.valid = bypass_i ? push_1_i.valid | fence_state_q[1] : out_valid; + + assign pop_0_o.data = fence_state_q[0] ? data_0_q: push_0_i.data; + assign pop_1_o.data = fence_state_q[1] ? data_1_q: push_1_i.data; + + assign pop_0_o.strb = fence_state_q[0] ? strb_0_q: push_0_i.data; + assign pop_1_o.strb = fence_state_q[1] ? strb_1_q: push_1_i.strb; + + always_comb begin + fence_state_d[0] = fence_state_q[0]; + data_0_d = data_0_q; + strb_0_d = strb_0_q; + if(in_strm_hs[0] & out_strm_hs[0]) begin + fence_state_d[0] = fence_state_q[0]; + data_0_d = fence_state_q[0] ? push_0_i.data : data_0_q; + strb_0_d = fence_state_q[0] ? push_0_i.strb : strb_0_q; + end else if (in_strm_hs[0] & ~out_strm_hs[0]) begin + fence_state_d[0] = 1'b1; + data_0_d = push_0_i.data; + strb_0_d = push_0_i.strb; + end else if (~in_strm_hs[0] & out_strm_hs[0]) begin + fence_state_d[0] = 1'b0; + data_0_d = '0; + strb_0_d = '0; + end + end + + always_comb begin + fence_state_d[1] = fence_state_q[1]; + data_1_d = data_1_q; + strb_1_d = strb_1_q; + if(in_strm_hs[1] & out_strm_hs[1]) begin + fence_state_d[1] = fence_state_q[1]; + data_1_d = fence_state_q[1] ? push_1_i.data : data_1_q; + strb_1_d = fence_state_q[1] ? push_1_i.strb : strb_1_q; + end else if (in_strm_hs[1] & ~out_strm_hs[1]) begin + fence_state_d[1] = 1'b1; + data_1_d = push_1_i.data; + strb_1_d = push_1_i.strb; + end else if (~in_strm_hs[1] & out_strm_hs[1]) begin + fence_state_d[1] = 1'b0; + data_1_d = '0; + strb_1_d = '0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) begin + data_0_q <= '0; + data_1_q <= '0; + strb_0_q <= '0; + strb_1_q <= '0; + end else if(clear_i) begin + data_0_q <= '0; + data_1_q <= '0; + strb_0_q <= '0; + strb_1_q <= '0; + end else begin + data_0_q <= data_0_d; + data_1_q <= data_1_d; + strb_0_q <= strb_0_d; + strb_1_q <= strb_1_d; + end + end + + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) + fence_state_q <= '0; + else if(clear_i) + fence_state_q <= '0; + else + fence_state_q <= fence_state_d; + end + +endmodule // hwpe_stream_fence diff --git a/rtl/basic/hwpe_stream_serialize.sv b/rtl/basic/hwpe_stream_serialize.sv index ed8c388..89b8277 100644 --- a/rtl/basic/hwpe_stream_serialize.sv +++ b/rtl/basic/hwpe_stream_serialize.sv @@ -61,6 +61,9 @@ module hwpe_stream_serialize #( parameter int unsigned NB_IN_STREAMS = 2, parameter int unsigned CONTIG_LIMIT = 1024, parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned ELEMENT_WIDTH = 8, + parameter int unsigned MODE = 0, + localparam int unsigned NUM_ELEMENTS = DATA_WIDTH/ELEMENT_WIDTH, parameter logic SYNC_READY = 1'b0 ) ( @@ -80,7 +83,7 @@ module hwpe_stream_serialize #( // boilerplate for SystemVerilog compliance logic [NB_IN_STREAMS-1:0][DATA_WIDTH-1:0] push_data; logic [NB_IN_STREAMS-1:0] push_valid; - logic [NB_IN_STREAMS-1:0][DATA_WIDTH/8-1:0] push_strb; + logic [NB_IN_STREAMS-1:0][NUM_ELEMENTS-1:0] push_strb; logic [NB_IN_STREAMS-1:0] push_ready; generate @@ -88,7 +91,7 @@ module hwpe_stream_serialize #( assign push_data [ii] = push_i[ii].data; assign push_strb [ii] = push_i[ii].strb; - assign push_valid[ii] = push_i[ii].valid; + assign push_valid[ii] = push_i[ii].valid & stream_cnt_en; assign push_i[ii].ready = push_ready[ii]; end @@ -99,9 +102,16 @@ module hwpe_stream_serialize #( assign pop_o.valid = push_valid[stream_cnt_q]; assign pop_o.strb = push_strb [stream_cnt_q]; + logic last_stream; + + if(MODE == 0) + assign last_stream = 1'b0; + else + assign last_stream = (contig_cnt_q == ctrl_i.nb_contig_m1 - 1) ? stream_cnt_en & pop_o.ready : 1'b0; + if(SYNC_READY) begin : sync_ready_gen for(genvar ii=0; ii not well supported by some tools + parameter int unsigned ELEMENT_WIDTH = 8; // by default a byte as the element width + parameter int unsigned STRB_WIDTH = DATA_WIDTH/ELEMENT_WIDTH; `ifndef SYNTHESIS // the TRVR assert is disabled by default, as it is only valid for zero-latency // accesses (e.g. using FIFO queues breaks this assumption) @@ -30,9 +33,9 @@ interface hwpe_stream_intf_tcdm ( logic gnt; logic [31:0] add; logic wen; - logic [3:0] be; - logic [31:0] data; - logic [31:0] r_data; + logic [STRB_WIDTH-1:0] be; + logic [DATA_WIDTH-1:0] data; + logic [DATA_WIDTH-1:0] r_data; logic r_valid; modport master ( @@ -71,7 +74,8 @@ interface hwpe_stream_intf_stream input logic clk ); parameter int unsigned DATA_WIDTH = 32; // used to default to -1 and always overridden --> not well supported by some tools - parameter int unsigned STRB_WIDTH = DATA_WIDTH/8; + parameter int unsigned ELEMENT_WIDTH = 8; // by default a byte as the element width + parameter int unsigned STRB_WIDTH = DATA_WIDTH/ELEMENT_WIDTH; `ifndef SYNTHESIS parameter bit BYPASS_VCR_ASSERT = 1'b0; parameter bit BYPASS_VDR_ASSERT = 1'b0; diff --git a/rtl/hwpe_stream_package.sv b/rtl/hwpe_stream_package.sv index a42fcc2..82b132d 100644 --- a/rtl/hwpe_stream_package.sv +++ b/rtl/hwpe_stream_package.sv @@ -64,6 +64,21 @@ package hwpe_stream_package; logic [2:0] dim_enable_1h; } ctrl_addressgen_v3_t; + typedef struct packed { + logic [31:0] base_addr; + logic [31:0] tot_len; // former word_length + logic [31:0] d0_len; // former line_length + logic signed [31:0] d0_stride; // former word_stride + logic [31:0] d1_len; // former block_length + logic signed [31:0] d1_stride; // former line_stride + logic [31:0] d2_len; + logic signed [31:0] d2_stride; // former block_stride + logic [31:0] d3_len; + logic signed [31:0] d3_stride; + logic signed [31:0] d4_stride; + logic [3:0] dim_enable_1h; + } ctrl_addressgen_v4_t; + typedef struct packed { logic enable; logic strb_valid; @@ -94,6 +109,10 @@ package hwpe_stream_package; logic done; } flags_addressgen_v3_t; + typedef struct packed { + logic done; + } flags_addressgen_v4_t; + typedef struct packed { logic empty; logic full; diff --git a/rtl/streamer/hwpe_stream_addressgen_v4.sv b/rtl/streamer/hwpe_stream_addressgen_v4.sv new file mode 100644 index 0000000..4f66a51 --- /dev/null +++ b/rtl/streamer/hwpe_stream_addressgen_v4.sv @@ -0,0 +1,346 @@ +/* + * hwpe_stream_addressgen_v3.sv + * Francesco Conti + * + * Copyright (C) 2014-2020 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hwpe_stream_addressgen_v3** module is used to generate addresses to + * load or store HWPE-Stream stream. In this version of the address generator, + * the address is itself carried within a HWPE-Stream, making it easily stallable. + * The address generator can be used to generate address from a + * three-dimensional space, which can be visited with configurable strides in all + * three dimensions. + * + * The multiple loop functionality is partially overlapped by the functionality + * provided by the microcode processor `hwce_ctrl_ucode` that can be embedded + * in HWPEs. The latter is much more flexible and smaller, but less fast. + * + * One iteration is performed per each cycle when `enable_i` is 1 and the output + * `addr_o` stream is ready. `presample_i` should be 1 in the first cycle in which + * the address generator can start generating addresses, and no further. + * The following piece of pseudo-C code resumes the basic functionality provided by + * the address generator. + * + * .. code-block:: C + * + * hwpe_stream_addressgen_v3( + * int base_addr, // base address (byte-aligned) + * int d0_len, int d1_len, int tot_len // d0,d1,total length (in number of transactions) + * int d0_stride, int d1_stride, int d2_stride, // d0,d1,d2 strides (in bytes) + * int *d0_addr, int *d1_addr, int *d2_addr, // d0,d1,d2 addresses (by reference) + * int *d0_cnt, int *d1_cnt, int *ov_cnt // d0,d1,overall counters (by reference) + * ) { + * // compute current address + * int current_addr = 0; + * int done = 0; + * if (dim_enable & 0x1 == 0) { // 1-dimensional streaming + * current_addr = base_addr + *d0_addr; + * } + * else if(dim_enable & 0x2 == 0) { // 2-dimensional streaming + * current_addr = base_addr + *d1_addr + *d0_addr; + * } + * else { // 3-dimensional streaming + * current_addr = base_addr + *d2_addr + *d1_addr + *d0_addr; + * } + * // update counters and dimensional addresses + * if(*ov_cnt == tot_len) { + * done = 1; + * } + * if((*d0_cnt < d0_len) || (dim_enable & 0x1 == 0)) { + * *d0_addr = *d0_addr + d0_stride; + * *d0_cnt = *d0_cnt + 1; + * } + * else if ((*d1_cnt < d1_len) || (dim_enable & 0x2 == 0)) { + * *d0_addr = 0; + * *d1_addr = *d1_addr + d1_stride; + * *d0_cnt = 1; + * *d1_cnt = *d1_cnt + 1; + * } + * else if ((*d2_cnt < d2_len) || (dim_enable & 0x4 == 0)) { + * *d0_addr = 0; + * *d1_addr = 0; + * *d2_addr = *d2_addr + d2_stride; + * *d0_cnt = 1; + * *d1_cnt = 1; + * *d2_cnt = *d2_cnt + 1; + * } + * else { + * *d0_addr = 0; + * *d1_addr = 0; + * *d2_addr = 0; + * *d3_addr = *d3_addr + d3_stride; + * *d0_cnt = 1; + * *d1_cnt = 1; + * *d2_cnt = 1; + * } + * *ov_cnt = *ov_cnt + 1; + * return current_addr, done; + * } + * + * .. tabularcolumns:: |l|l|J| + * .. _hwpe_stream_addressgen_v3_params: + * .. table:: **hwpe_stream_addressgen_v3** design-time parameters. + * + * +-------------------------+------------------------------------+---------------------------------------------------------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +-------------------------+------------------------------------+---------------------------------------------------------------------------------------------+ + * | *TRANS_CNT* | 32 | Number of bits supported in the transaction counter, which will overflow at 2^ `TRANS_CNT`. | + * +-------------------------+------------------------------------+---------------------------------------------------------------------------------------------+ + * | *CNT* | 32 | Number of bits supported in non-transaction counters, which will overflow at 2^ `CNT`. | + * +-------------------------+------------------------------------+---------------------------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hwpe_stream_addressgen_v3_ctrl: + * .. table:: **hwpe_stream_addressgen_v3** input control signals. + * + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *base_addr* | `logic[31:0]` | Byte-aligned base address of the stream in the HWPE-accessible memory. | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *tot_len* | `logic[31:0]` | Total number of transactions in stream; only the `TRANS_CNT` LSB are actually used. | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d0_len* | `logic[31:0]` | d0 length in number of transactions | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d0_stride* | `logic[31:0]` | d0 stride in bytes | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d0_len* | `logic[31:0]` | d0 length in number of transactions | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d1_stride* | `logic[31:0]` | d1 stride in bytes | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d1_len* | `logic[31:0]` | d1 length in number of transactions | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d2_stride* | `logic[31:0]` | d2 stride in bytes | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d2_len* | `logic[31:0]` | d2 length in number of transactions | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *d3_stride* | `logic[31:0]` | d3 stride in bytes | + * * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * | *dim_enable_1h* | `logic[2:0]` | One-hot switch to enable 4-d counting (111), 3-d (011), 2-d (001), or 1-d (000). | + * +----------------------------------+----------------------+-------------------------------------------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hwpe_stream_addressgen_v3_flags: + * .. table:: **hwpe_stream_addressgen_v3** output flags. + * + * +-----------------+------------------+-----------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +-----------------+------------------+-----------------------------------------------+ + * | *done* | `logic` | 1 when the address generation has finished. | + * +-----------------+------------------+-----------------------------------------------+ + * + */ + + +import hwpe_stream_package::*; + +module hwpe_stream_addressgen_v4 +#( + parameter int unsigned TRANS_CNT = 32, + parameter int unsigned CNT = 32, // number of bits used within the internal counter + parameter bit [3:0] DIM_ENABLE_1H = 4'b1111 // Number of dimensions enabled on HW side +) +( + // global signals + input logic clk_i, + input logic rst_ni, + // local enable and clear + input logic enable_i, + input logic clear_i, + input logic presample_i, + // generated output address + hwpe_stream_intf_stream.source addr_o, + // control channel + input ctrl_addressgen_v4_t ctrl_i, + output flags_addressgen_v4_t flags_o +); + + logic signed [31:0] d0_stride; + logic signed [31:0] d1_stride; + logic signed [31:0] d2_stride; + logic signed [31:0] d3_stride; + logic signed [31:0] d4_stride; + + + + logic [31:0] gen_addr_int; + logic done; + + logic [TRANS_CNT-1:0] overall_counter_d; + logic [CNT-1:0] d0_counter_d; + logic [CNT-1:0] d1_counter_d; + logic [CNT-1:0] d2_counter_d; + logic [CNT-1:0] d3_counter_d; + logic [CNT-1:0] d4_counter_d; + logic [31:0] d0_addr_d; + logic [31:0] d1_addr_d; + logic [31:0] d2_addr_d; + logic [31:0] d3_addr_d; + logic [31:0] d4_addr_d; + logic [TRANS_CNT-1:0] overall_counter_q; + logic [CNT-1:0] d0_counter_q; + logic [CNT-1:0] d1_counter_q; + logic [CNT-1:0] d2_counter_q; + logic [CNT-1:0] d3_counter_q; + logic [CNT-1:0] d4_counter_q; + logic [31:0] d0_addr_q; + logic [31:0] d1_addr_q; + logic [31:0] d2_addr_q; + logic [31:0] d3_addr_q; + logic [31:0] d4_addr_q; + + logic addr_valid_d, addr_valid_q; + + assign d0_stride = $signed(ctrl_i.d0_stride); + assign d1_stride = $signed(ctrl_i.d1_stride); + assign d2_stride = $signed(ctrl_i.d2_stride); + assign d3_stride = $signed(ctrl_i.d3_stride); + assign d4_stride = $signed(ctrl_i.d4_stride); + + // address generation + always_comb + begin : address_gen_counters_comb + d0_addr_d = d0_addr_q; + d1_addr_d = d1_addr_q; + d2_addr_d = d2_addr_q; + d3_addr_d = d3_addr_q; + d4_addr_d = d4_addr_q; + d0_counter_d = d0_counter_q; + d1_counter_d = d1_counter_q; + d2_counter_d = d2_counter_q; + d3_counter_d = d3_counter_q; + d4_counter_d = d4_counter_q; + overall_counter_d = overall_counter_q; + addr_valid_d = addr_valid_q; + done = '0; + if(addr_o.ready) begin + if(overall_counter_q < ctrl_i.tot_len) begin + addr_valid_d = 1'b1; + if((d0_counter_q < ctrl_i.d0_len) || (ctrl_i.dim_enable_1h[0] == 1'b0) || (DIM_ENABLE_1H[0] == 1'b0)) begin + d0_addr_d = d0_addr_q + d0_stride; + d0_counter_d = d0_counter_q + 1; + end + else if ((d1_counter_q < ctrl_i.d1_len) || (ctrl_i.dim_enable_1h[1] == 1'b0) || (DIM_ENABLE_1H[1] == 1'b0)) begin + d0_addr_d = '0; + d1_addr_d = d1_addr_q + d1_stride; + d0_counter_d = 1; + d1_counter_d = d1_counter_q + 1; + end + else if ((d2_counter_q < ctrl_i.d2_len) || (ctrl_i.dim_enable_1h[2] == 1'b0) || (DIM_ENABLE_1H[2] == 1'b0)) begin + d0_addr_d = '0; + d1_addr_d = '0; + d2_addr_d = d2_addr_q + d2_stride; + d0_counter_d = 1; + d1_counter_d = 1; + d2_counter_d = d2_counter_q + 1; + end + else if ((d3_counter_q < ctrl_i.d3_len) || (ctrl_i.dim_enable_1h[3] == 1'b0) || (DIM_ENABLE_1H[3] == 1'b0)) begin + d0_addr_d = '0; + d1_addr_d = '0; + d2_addr_d = '0; + d3_addr_d = d3_addr_q + d3_stride; + d0_counter_d = 1; + d1_counter_d = 1; + d2_counter_d = 1; + d3_counter_d = d3_counter_q + 1; + end + else begin + d0_addr_d = '0; + d1_addr_d = '0; + d2_addr_d = '0; + d3_addr_d = '0; + d4_addr_d = d4_addr_q + d4_stride; + d0_counter_d = 1; + d1_counter_d = 1; + d2_counter_d = 1; + d3_counter_d = 1; + d4_counter_d = d4_counter_q + 1; + end + overall_counter_d = overall_counter_q + 1; + end + else begin + addr_valid_d = 1'b0; + done = 1'b1; + end + end + end + + // address generation + always_ff @(posedge clk_i or negedge rst_ni) + begin : address_gen_counters_d0_ff + if (~rst_ni) begin + d0_addr_q <= '0; + end + else if (clear_i) begin + d0_addr_q <= '0; + end + else if (presample_i) begin + d0_addr_q <= '0; + end + else if (enable_i) begin + d0_addr_q <= d0_addr_d; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : address_gen_counters_ff + if (~rst_ni) begin + d1_addr_q <= '0; + d2_addr_q <= '0; + d3_addr_q <= '0; + d4_addr_q <= '0; + d0_counter_q <= '0; + d1_counter_q <= 1; + d2_counter_q <= 1; + d3_counter_q <= 1; + d4_counter_q <= 1; + overall_counter_q <= '0; + addr_valid_q <= '0; + end + else if (clear_i) begin + d1_addr_q <= '0; + d2_addr_q <= '0; + d3_addr_q <= '0; + d4_addr_q <= '0; + d0_counter_q <= '0; + d1_counter_q <= 1; + d2_counter_q <= 1; + d3_counter_q <= 1; + d4_counter_q <= 1; + overall_counter_q <= '0; + addr_valid_q <= '0; + end + else if(enable_i) begin + d1_addr_q <= d1_addr_d; + d2_addr_q <= d2_addr_d; + d3_addr_q <= d3_addr_d; + d4_addr_q <= d4_addr_d; + d0_counter_q <= d0_counter_d; + d1_counter_q <= d1_counter_d; + d2_counter_q <= d2_counter_d; + d3_counter_q <= d3_counter_d; + d4_counter_q <= d4_counter_d; + overall_counter_q <= overall_counter_d; + addr_valid_q <= addr_valid_d; + end + end + + assign gen_addr_int = ctrl_i.base_addr + d4_addr_q + d3_addr_q + d2_addr_q + d1_addr_q + d0_addr_q; + + assign addr_o.data = gen_addr_int; + assign addr_o.strb = '1; + assign addr_o.valid = addr_valid_q; + + assign flags_o.done = done; + +endmodule // hwpe_stream_addressgen_v4