diff --git a/Bender.yml b/Bender.yml index 9c57081..8eb97c0 100644 --- a/Bender.yml +++ b/Bender.yml @@ -42,6 +42,7 @@ sources: - rtl/core/hci_core_r_valid_filter.sv - rtl/core/hci_core_r_id_filter.sv - rtl/core/hci_core_source.sv + - rtl/core/hci_core_source_v2.sv - rtl/core/hci_core_split.sv - rtl/ecc/hci_ecc_dec.sv - rtl/ecc/hci_ecc_enc.sv @@ -57,6 +58,7 @@ sources: # Level 3 - rtl/core/hci_core_sink.sv - rtl/ecc/hci_ecc_source.sv + - rtl/core/hci_core_sink_v2.sv - rtl/interco/hci_router.sv # Level 4 - rtl/ecc/hci_ecc_interconnect.sv diff --git a/rtl/common/hci_package.sv b/rtl/common/hci_package.sv index 1e3d1e1..a8e3278 100644 --- a/rtl/common/hci_package.sv +++ b/rtl/common/hci_package.sv @@ -65,8 +65,19 @@ package hci_package; hwpe_stream_package::flags_addressgen_v3_t addressgen_flags; } hci_streamer_flags_t; + typedef struct packed { + logic valid; + hwpe_stream_package::ctrl_addressgen_v3_t addressgen_ctrl; + } hci_streamer_v2_ctrl_t; + + typedef struct packed { + logic ready; + logic done; + hwpe_stream_package::flags_addressgen_v3_t addressgen_flags; + } hci_streamer_v2_flags_t; + typedef enum { - STREAMER_IDLE, STREAMER_WORKING, STREAMER_DONE + STREAMER_IDLE, STREAMER_PRESAMPLE ,STREAMER_WORKING, STREAMER_DONE } hci_streamer_state_t; typedef enum { diff --git a/rtl/core/hci_core_sink_v2.sv b/rtl/core/hci_core_sink_v2.sv new file mode 100644 index 0000000..838d56c --- /dev/null +++ b/rtl/core/hci_core_sink_v2.sv @@ -0,0 +1,418 @@ +/* + * hci_core_sink_v2.sv + * Francesco Conti + * Diego Gorfini + * + * Copyright (C) 2014-2025 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hci_core_sink_v2** module is the high-level sink streamer + * performing a series of stores on a HCI-Core interface + * from an incoming HWPE-Stream data stream from a HWPE engine/datapath. + * The sink streamer is a composite module that makes use of many other + * fundamental IPs. + * + * Fundamentally, a sink streamer acts as a specialized DMA engine acting + * out a predefined pattern from an **hwpe_stream_addressgen_v3** to perform + * a burst of stores via a HCI-Core interface, consuming a HWPE-Stream data + * stream into the HCI-Core `data` field. + * The sink streamer is insensitive to memory latency. + * This is due to the nature of store streams, which are unidirectional + * (i.e. `addr` and `data` move in the same direction). + * + * Misaligned accesses are supported by widening the HCI-Core data width of 32 + * bits compared to the HWPE-Stream that gets consumed by the streamer. + * The stream is shifted according to the address alignment and invalid bytes + * are disabled by unsetting their `strb`. This feature can be deactivated by + * unsetting the `MISALIGNED_ACCESS` parameter; in this case, the sink will + * only work correctly if all data is aligned to a word boundary. + * + * Compared to the **hci_core_sink** module, the **hci_core_sink_v2** introduce + * a job queue to enqueue streamer jobs earlier and streamline/optimize ctrl. + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_sink_v2_params: + * .. table:: **hci_core_sink_v2** design-time parameters. + * + * +------------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +------------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *TCDM_FIFO_DEPTH* | 2 | If >0, the module produces a HWPE-MemDecoupled interface and includes a TCDM FIFO of this depth. | + * +------------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *TRANS_CNT* | 16 | Number of bits supported in the transaction counter of the address generator, which will overflow at 2^ `TRANS_CNT`. | + * +------------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *MISALIGNED_ACCESS* | 1 | If set to 0, the sink will not support non-word-aligned HWPE-Mem accesses. | + * +------------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *JOB_FIFO_DEPTH* | 2 | Depth of the streamer job queue. | + * +------------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *JOB_FIFO_PASSTHROUGH* | 1 | If set to 1 (default), the streamer job queue is passthrough, otherwise it's a regular path-cutting FIFO. | + * +------------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_sink_v2_ctrl: + * .. table:: **hci_core_sink_v2** input control signals. + * + * +-------------------+------------------------+-----------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +-------------------+------------------------+-----------------------------------------------------------------------------+ + * | *req_start* | `logic` | When 1, the sink streamer operation is started if it is ready. | + * +-------------------+------------------------+-----------------------------------------------------------------------------+ + * | *addressgen_ctrl* | `ctrl_addressgen_v3_t` | Configuration of the address generator (see **hwpe_stream_addressgen_v3**). | + * +-------------------+------------------------+-----------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_sink_v2_flags: + * .. table:: **hci_core_sink_v2** output flags. + * + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *ready_start* | `logic` | 1 when the sink streamer is ready to start operation, from the first IDLE state cycle on. | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *done* | `logic` | 1 for one cycle when the streamer ends operation, in the cycle before it goes to IDLE state . | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *addressgen_flags* | `flags_addressgen_v3_t`| Address generator flags (see **hwpe_stream_addresgen_v3**). | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * + */ +`include "hci_helpers.svh" + +module hci_core_sink_v2 + import hwpe_stream_package::*; + import hci_package::*; +#( + // Stream interface params + parameter int unsigned TCDM_FIFO_DEPTH = 0, + parameter int unsigned TRANS_CNT = 16, + parameter int unsigned MISALIGNED_ACCESSES = 1, + parameter int unsigned JOB_FIFO_DEPTH = 2, + parameter int unsigned JOB_FIFO_PASSTHROUGH = 1, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 +) +( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + input logic clear_i, + input logic enable_i, + + hci_core_intf.initiator tcdm, + hwpe_stream_intf_stream.sink stream, + + // control plane + input hci_streamer_v2_ctrl_t ctrl_i, + output hci_streamer_v2_flags_t flags_o +); + + localparam int unsigned DATA_WIDTH = `HCI_SIZE_GET_DW(tcdm); + localparam int unsigned EHW = `HCI_SIZE_GET_EHW(tcdm); + + hci_streamer_state_t cs, ns; + flags_fifo_t addr_fifo_flags, job_fifo_flags; + + logic address_gen_en; + logic address_gen_clr; + logic job_pop_ready, presample; + + logic tcdm_inflight; + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 36 ) + ) addr_push ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 36 ) + ) addr_pop ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ) + ) job_push ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ) + ) job_pop ( + .clk ( clk_i ) + ); + + assign job_push.data = ctrl_i.addressgen_ctrl; + assign job_push.valid = ctrl_i.valid; + assign flags_o.ready = job_push.ready; + assign job_push.strb = '1; + + if (JOB_FIFO_PASSTHROUGH == 1) begin : job_fifo_passthrough_gen + hwpe_stream_fifo_passthrough #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ), + .FIFO_DEPTH ( JOB_FIFO_DEPTH ) + ) i_fifo_job ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( job_fifo_flags ), + .push_i ( job_push ), + .pop_o ( job_pop ) + ); + end + else begin : job_fifo_nopassthrough_gen + hwpe_stream_fifo #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ), + .FIFO_DEPTH ( JOB_FIFO_DEPTH ) + ) i_fifo_job ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( job_fifo_flags ), + .push_i ( job_push ), + .pop_o ( job_pop ) + ); + end + + assign job_pop.ready = job_pop_ready; + + localparam hci_size_parameter_t `HCI_SIZE_PARAM(tcdm_target) = '{ + DW: DATA_WIDTH, + AW: DEFAULT_AW, + BW: DEFAULT_BW, + UW: DEFAULT_UW, + IW: DEFAULT_IW, + EW: DEFAULT_EW, + EHW: DEFAULT_EHW + }; + `HCI_INTF(tcdm_target, clk_i); + + hwpe_stream_addressgen_v3 i_addressgen ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .enable_i ( address_gen_en ), + .clear_i ( address_gen_clr ), + .presample_i ( presample ), + .addr_o ( addr_push ), + .ctrl_i ( job_pop.data ), + .flags_o ( flags_o.addressgen_flags ) + ); + + hwpe_stream_fifo #( + .DATA_WIDTH ( 36 ), + .FIFO_DEPTH ( 2 ) + ) i_fifo_addr ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( addr_fifo_flags ), + .push_i ( addr_push ), + .pop_o ( addr_pop ) + ); + + logic address_cnt_en, address_cnt_clr; + logic [TRANS_CNT-1:0] address_cnt_d, address_cnt_q; + + logic [DATA_WIDTH-1:0] stream_data_misaligned; + logic [DATA_WIDTH/8-1:0] stream_strb_misaligned; + logic [DATA_WIDTH-1:0] stream_data_aligned; + logic [DATA_WIDTH/8-1:0] stream_strb_aligned; + + assign stream_data_misaligned = stream.data; + assign stream_strb_misaligned = stream.strb; + + if (MISALIGNED_ACCESSES==1 ) begin : misaligned_access_gen + always_comb + begin + stream_data_aligned = '0; + stream_strb_aligned = '0; + case(addr_pop.data[1:0]) + 2'b00: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-32)/8-1:0] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + 2'b01: begin + stream_data_aligned[DATA_WIDTH-24-1:8] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-24)/8-1:1] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + 2'b10: begin + stream_data_aligned[DATA_WIDTH-16-1:16] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-16)/8-1:2] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + 2'b11: begin + stream_data_aligned[DATA_WIDTH-8-1:24] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-8)/8-1:3] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + endcase + end + end + else begin + assign stream_data_aligned[DATA_WIDTH-1:0] = stream_data_misaligned[DATA_WIDTH-1:0]; + assign stream_strb_aligned[DATA_WIDTH/8-1:0] = stream_strb_misaligned[DATA_WIDTH/8-1:0]; + end + + // hci port binding + assign tcdm_target.req = (cs != STREAMER_IDLE) ? stream.valid & addr_pop.valid : '0; + assign tcdm_target.add = (cs != STREAMER_IDLE) ? {addr_pop.data[31:2],2'b0} : '0; + assign tcdm_target.wen = '0; + assign tcdm_target.be = (cs != STREAMER_IDLE) ? stream_strb_aligned : '0; + assign tcdm_target.data = (cs != STREAMER_IDLE) ? stream_data_aligned : '0; + assign tcdm_target.r_ready = '1; + assign stream.ready = ~stream.valid | (tcdm_target.gnt & addr_pop.valid); + assign addr_pop.ready = stream.valid & stream.ready; + + // unimplemented user bits = 0 + assign tcdm_target.user = '0; + + // unimplemented id bits = 0 + assign tcdm_target.id = '0; + + // FIXME unimplemented ECC bits + assign tcdm_target.ecc = '0; + + generate + + if(TCDM_FIFO_DEPTH != 0) begin: tcdm_fifos_gen + + hci_core_fifo #( + .FIFO_DEPTH ( TCDM_FIFO_DEPTH ), + .`HCI_SIZE_PARAM(tcdm_initiator) ( `HCI_SIZE_PARAM(tcdm) ) + ) i_tcdm_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .tcdm_target ( tcdm_target ), + .tcdm_initiator ( tcdm ), + .flags_o ( ) + ); + + end + else begin: no_tcdm_fifos_gen + + hci_core_assign i_tcdm_assign ( + .tcdm_target ( tcdm_target ), + .tcdm_initiator ( tcdm ) + ); + + end + + endgenerate + + assign tcdm_inflight = tcdm.req; + + always_ff @(posedge clk_i, negedge rst_ni) + begin : fsm_seq + if(rst_ni == 1'b0) begin + cs <= STREAMER_IDLE; + end + else if(clear_i == 1'b1) begin + cs <= STREAMER_IDLE; + end + else if(enable_i) begin + cs <= ns; + end + end + + always_comb + begin : fsm_comb + ns = cs; + job_pop_ready = 1'b0; + flags_o.done = 1'b0; + address_gen_en = 1'b0; + presample = 1'b0; + address_gen_clr = clear_i; + address_cnt_clr = 1'b0; + case(cs) + STREAMER_IDLE : begin + if(job_pop.valid) begin + ns = STREAMER_PRESAMPLE; + address_gen_en = 1'b1; + presample = 1'b1; + end + end + STREAMER_PRESAMPLE : begin + ns = STREAMER_WORKING; + address_gen_en = 1'b1; + if(flags_o.addressgen_flags.done) begin + ns = STREAMER_DONE; + job_pop_ready = 1'b1; + end + end + STREAMER_WORKING : begin + address_gen_en = 1'b1; + if(flags_o.addressgen_flags.done) begin + ns = STREAMER_DONE; + job_pop_ready = 1'b1; + end + end + STREAMER_DONE : begin + address_gen_en = 1'b1; + if(address_cnt_q==ctrl_i.addressgen_ctrl.tot_len) begin + flags_o.done = 1'b1; + address_gen_en = 1'b0; + address_gen_clr = 1'b1; + address_cnt_clr = 1'b1; + if(job_pop.valid) begin + ns = STREAMER_PRESAMPLE; + end + else begin + ns = STREAMER_IDLE; + end + end + end + endcase + end + + assign address_cnt_en = addr_pop.valid & addr_pop.ready; + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) + address_cnt_q <= '0; + else if(clear_i | address_cnt_clr) + address_cnt_q <= '0; + else if(enable_i & address_cnt_en) + address_cnt_q <= address_cnt_d; + end + assign address_cnt_d = address_cnt_q + 1; + +/* + * ECC Handshake signals + */ + if(EHW > 0) begin : ecc_handshake_gen + assign tcdm_target.ereq = '{default:{tcdm_target.req}}; + assign tcdm_target.r_eready = '{default:{tcdm_target.r_ready}}; + end + else begin : no_ecc_handshake_gen + assign tcdm_target.ereq = '0; + assign tcdm_target.r_eready = '1; // assign all gnt's to 1 + end + +/* + * Interface size asserts + */ +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS + if(MISALIGNED_ACCESSES == 0) begin + initial + dw : assert(stream.DATA_WIDTH == tcdm.DW); + end + else begin + initial + dw : assert(stream.DATA_WIDTH+32 == tcdm.DW); + end + + `HCI_SIZE_CHECK_ASSERTS(tcdm); +`endif +`endif +`endif + +endmodule // hci_core_sink_v2 diff --git a/rtl/core/hci_core_source_v2.sv b/rtl/core/hci_core_source_v2.sv new file mode 100644 index 0000000..5a310c9 --- /dev/null +++ b/rtl/core/hci_core_source_v2.sv @@ -0,0 +1,447 @@ +/* + * hci_core_source_v2.sv + * Francesco Conti + * Arpan Suravi Prasad + * Diego Gorfini + * + * Copyright (C) 2014-2025 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hci_core_source_v2** module is the high-level source streamer + * performing a series of loads on a HCI-Core interface + * and producing a HWPE-Stream data stream to feed a HWPE engine/datapath. + * The source streamer is a composite module that makes use of many other + * fundamental IPs. + * + * Fundamentally, a source streamer acts as a specialized DMA engine acting + * out a predefined pattern from an **hwpe_stream_addressgen_v3** to perform + * a burst of loads via a HCI-Core interface, producing a HWPE-Stream + * data stream from the HCI-Core `r_data` field. + * By default, the HCI-Core streamer supports delayed accesses using a HCI-Core + * interface. + * + * Misaligned accesses are supported by widening the HCI-Core data width of 32 + * bits compared to the HWPE-Stream that gets produced by the streamer. + * Unused bytes are simply ignored. This feature can be deactivated by unsetting + * the `MISALIGNED_ACCESS` parameter; in this case, the sink will + * only work correctly if all data is aligned to a word boundary. + * + * In principle, the source streamer is insensitive to latency. + * However, when configured to support misaligned memory accesses, the address FIFO + * depth sets the maximum supported latency. + * This parameter can be controlled by the `ADDR_MIS_DEPTH` parameter (default 8). + * + * Compared to the **hci_core_source** module, the **hci_core_source_v2** introduce + * a job queue to enqueue streamer jobs earlier and streamline/optimize ctrl. + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_source_v2_params: + * .. table:: **hci_core_source_v2** design-time parameters. + * + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *LATCH_FIFO* | 0 | If 1, use latches instead of flip-flops (requires special constraints in synthesis). | + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *TRANS_CNT* | 16 | Number of bits supported in the transaction counter of the address generator, which will overflow at 2^ `TRANS_CNT`. | + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *ADDR_MIS_DEPTH* | 8 | Depth of the misaligned address FIFO. This **must** be equal to the max-latency between the HCI-Core `gnt` and `r_valid`.| + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *MISALIGNED_ACCESS* | 1 | If set to 0, the source will not support non-word-aligned HCI-Core accesses. | + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *PASSTHROUGH_FIFO* | 0 | If set to 1, the address FIFO will be capable of fall-through operation (i.e., skipping the FIFO latency entirely). | + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *JOB_FIFO_DEPTH* | 2 | Depth of the streamer job queue. | + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *JOB_FIFO_PASSTHROUGH* | 1 | If set to 1 (default), the streamer job queue is passthrough, otherwise it's a regular path-cutting FIFO. | + * +------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_source_v2_ctrl: + * .. table:: **hci_core_source_v2** input control signals. + * + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | *req_start* | `logic` | When 1, the source streamer operation is started if it is ready. | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | *addressgen_ctrl* | `ctrl_addressgen_v3_t` | Configuration of the address generator (see **hwpe_stream_addresgen_v3**). | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_source_v2_flags: + * .. table:: **hci_core_source_v2** output flags. + * + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *ready_start* | `logic` | 1 when the source streamer is ready to start operation, from the first IDLE state cycle on. | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *done* | `logic` | 1 for one cycle when the streamer ends operation, in the cycle before it goes to IDLE state . | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *addressgen_flags* | `flags_addressgen_v3_t`| Address generator flags (see **hwpe_stream_addresgen_v3**). | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * + */ + +`include "hci_helpers.svh" + +module hci_core_source_v2 + import hwpe_stream_package::*; + import hci_package::*; +#( + // Stream interface params + parameter int unsigned LATCH_FIFO = 0, + parameter int unsigned TRANS_CNT = 16, + parameter int unsigned ADDR_MIS_DEPTH = 8, // Beware: this must be >= the maximum latency between TCDM gnt and TCDM r_valid!!! + parameter int unsigned MISALIGNED_ACCESSES = 1, + parameter int unsigned JOB_FIFO_DEPTH = 2, + parameter int unsigned JOB_FIFO_PASSTHROUGH = 1, + parameter int unsigned PASSTHROUGH_FIFO = 0, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 +) +( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + input logic clear_i, + input logic enable_i, + + hci_core_intf.initiator tcdm, + hwpe_stream_intf_stream.source stream, + + // control plane + input hci_streamer_v2_ctrl_t ctrl_i, + output hci_streamer_v2_flags_t flags_o +); + + localparam int unsigned DATA_WIDTH = `HCI_SIZE_GET_DW(tcdm); + localparam int unsigned EHW = `HCI_SIZE_GET_EHW(tcdm); + + hci_streamer_state_t cs, ns; + flags_fifo_t addr_fifo_flags, job_fifo_flags; + + logic job_pop_ready, presample; + logic address_gen_en; + logic address_gen_clr; + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 32 ) + ) addr_push ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 32 ) + ) addr_pop ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ) + ) job_push ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ) + ) job_pop ( + .clk ( clk_i ) + ); + + assign job_push.data = ctrl_i.addressgen_ctrl; + assign job_push.valid = ctrl_i.valid; + assign flags_o.ready = job_push.ready; + assign job_push.strb = '1; + + if (JOB_FIFO_PASSTHROUGH == 1) begin : job_fifo_passthrough_gen + hwpe_stream_fifo_passthrough #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ), + .FIFO_DEPTH ( JOB_FIFO_DEPTH ) + ) i_fifo_job ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( job_fifo_flags ), + .push_i ( job_push ), + .pop_o ( job_pop ) + ); + end + else begin : job_fifo_nopassthrough_gen + hwpe_stream_fifo #( + .DATA_WIDTH ( $bits(ctrl_addressgen_v3_t) ), + .FIFO_DEPTH ( JOB_FIFO_DEPTH ) + ) i_fifo_job ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( job_fifo_flags ), + .push_i ( job_push ), + .pop_o ( job_pop ) + ); + end + + assign job_pop.ready = job_pop_ready; + + // generate addresses + hwpe_stream_addressgen_v3 i_addressgen ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .enable_i ( address_gen_en ), + .clear_i ( address_gen_clr ), + .presample_i ( presample ), + .addr_o ( addr_push ), + .ctrl_i ( job_pop.data ), + .flags_o ( flags_o.addressgen_flags ) + ); + + if (PASSTHROUGH_FIFO) begin : passthrough_gen + hwpe_stream_fifo_passthrough #( + .DATA_WIDTH ( 36 ), + .FIFO_DEPTH ( 2 ) + ) i_fifo_addr ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( addr_fifo_flags ), + .push_i ( addr_push ), + .pop_o ( addr_pop ) + ); + end + else begin : nopassthrough_gen + hwpe_stream_fifo #( + .DATA_WIDTH ( 36 ), + .FIFO_DEPTH ( 2 ) + ) i_fifo_addr ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( addr_fifo_flags ), + .push_i ( addr_push ), + .pop_o ( addr_pop ) + ); + end + + logic stream_valid_q; + logic [DATA_WIDTH-1:0] stream_data_q; + logic [1:0] addr_misaligned_q; + logic addr_misaligned_valid; + logic [DATA_WIDTH-1:0] stream_data_misaligned; + logic [DATA_WIDTH-1:0] stream_data_aligned; + + logic stream_cnt_en, stream_cnt_clr; + logic [TRANS_CNT-1:0] stream_cnt_d, stream_cnt_q; + + // this is simply exploiting the fact that we can make a wider data access than strictly necessary! + assign stream_data_misaligned = tcdm.r_valid ? tcdm.r_data : stream_data_q; // is this strictly necessary to keep the HWPE-Stream protocol? or can be avoided with a FIFO q? + + if (MISALIGNED_ACCESSES==1 ) begin : misaligned_access_gen + always_comb + begin + stream_data_aligned = '0; + case(addr_misaligned_q) + 2'b00: begin + stream_data_aligned[DATA_WIDTH-1:0] = stream_data_misaligned[DATA_WIDTH-1:0]; + end + 2'b01: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-24-1:8]; + end + 2'b10: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-16-1:16]; + end + 2'b11: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-8-1:24]; + end + endcase + end + end + else begin + assign stream_data_aligned[DATA_WIDTH-1:0] = stream_data_misaligned[DATA_WIDTH-1:0]; + end + + assign tcdm.r_ready = stream.ready; + assign tcdm.req = (cs != STREAMER_IDLE) ? addr_pop.valid & stream.ready : '0; + assign tcdm.add = (cs != STREAMER_IDLE) ? {addr_pop.data[31:2],2'b0} : '0; + assign tcdm.wen = 1'b1; + assign tcdm.be = 4'h0; + assign tcdm.data = '0; + assign tcdm.user = '0; + assign tcdm.id = '0; + assign tcdm.ecc = '0; + assign stream.strb = '1; + assign stream.data = stream_data_aligned; + assign stream.valid = enable_i & (tcdm.r_valid | stream_valid_q); // is this strictly necessary to keep the HWPE-Stream protocol? or can be avoided with a FIFO q? + assign addr_pop.ready = (cs != STREAMER_IDLE) ? addr_pop.valid & stream.ready & tcdm.gnt : 1'b0; + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 8 ) // only 2 significant + ) addr_misaligned_push ( + .clk ( clk_i ) + ); + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 8 ) // only 2 significant + ) addr_misaligned_pop ( + .clk ( clk_i ) + ); + assign addr_misaligned_push.data = {6'b0, addr_pop.data[1:0]}; + assign addr_misaligned_push.strb = '1; + assign addr_misaligned_push.valid = enable_i & tcdm.req & tcdm.gnt; // BEWARE: considered always ready!!! + assign addr_misaligned_pop.ready = (tcdm.r_valid | stream_valid_q) & stream.ready; + assign addr_misaligned_q = addr_misaligned_pop.data[1:0]; + + hwpe_stream_fifo #( + .DATA_WIDTH ( 8 ), // only [1:0] significant + .FIFO_DEPTH ( ADDR_MIS_DEPTH ) + ) i_addr_misaligned_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( ), + .push_i ( addr_misaligned_push ), + .pop_o ( addr_misaligned_pop ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) + stream_valid_q <= 1'b0; + else if(clear_i) + stream_valid_q <= 1'b0; + else if(enable_i) begin + if(tcdm.r_valid & stream.ready) + stream_valid_q <= 1'b0; + else if(tcdm.r_valid) + stream_valid_q <= 1'b1; + else if(stream_valid_q & stream.ready) + stream_valid_q <= 1'b0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) + stream_data_q <= '0; + else if(clear_i) + stream_data_q <= '0; + else if(enable_i & tcdm.r_valid) + stream_data_q <= tcdm.r_data; + end + + always_ff @(posedge clk_i, negedge rst_ni) + begin : fsm_seq + if(rst_ni == 1'b0) begin + cs <= STREAMER_IDLE; + end + else if(clear_i == 1'b1) begin + cs <= STREAMER_IDLE; + end + else if(enable_i) begin + cs <= ns; + end + end + + always_comb + begin : fsm_comb + ns = cs; + job_pop_ready = 1'b0; + flags_o.done = 1'b0; + presample = 1'b0; + address_gen_en = 1'b0; + address_gen_clr = clear_i; + stream_cnt_clr = 1'b0; + case(cs) + STREAMER_IDLE : begin + if(job_pop.valid) begin + ns = STREAMER_PRESAMPLE; + address_gen_en = 1'b1; + presample = 1'b1; + end + end + STREAMER_PRESAMPLE : begin + ns = STREAMER_WORKING; + address_gen_en = 1'b1; + if(flags_o.addressgen_flags.done) begin + ns = STREAMER_DONE; + job_pop_ready = 1'b1; + end + end + STREAMER_WORKING : begin + address_gen_en = 1'b1; + if(flags_o.addressgen_flags.done) begin + ns = STREAMER_DONE; + job_pop_ready = 1'b1; + end + end + STREAMER_DONE : begin + address_gen_en = 1'b1; + if((addr_fifo_flags.empty==1'b1) && (stream_cnt_q==job_pop.data.tot_len)) begin + flags_o.done = 1'b1; + address_gen_en = 1'b0; + address_gen_clr = 1'b1; + stream_cnt_clr = 1'b1; + if(job_pop.valid) begin + ns = STREAMER_PRESAMPLE; + end + else begin + ns = STREAMER_IDLE; + end + end + end + endcase + end + + assign stream_cnt_en = stream.valid & stream.ready; + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) + stream_cnt_q <= '0; + else if(clear_i | stream_cnt_clr) + stream_cnt_q <= '0; + else if(enable_i & stream_cnt_en) + stream_cnt_q <= stream_cnt_d; + end + assign stream_cnt_d = stream_cnt_q + 1; + +/* + * ECC Handshake signals + */ + if(EHW > 0) begin : ecc_handshake_gen + assign tcdm.ereq = '{default: {tcdm.req}}; + assign tcdm.r_eready = '{default: {tcdm.r_ready}}; + end + else begin : no_ecc_handshake_gen + assign tcdm.ereq = '0; + assign tcdm.r_eready = '1; // assign all gnt's to 1 + end + +/* + * Interface size asserts + */ +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS + if(MISALIGNED_ACCESSES == 0) begin + initial + dw : assert(stream.DATA_WIDTH == tcdm.DW); + end + else begin + initial + dw : assert(stream.DATA_WIDTH <= tcdm.DW); + end + + `HCI_SIZE_CHECK_ASSERTS(tcdm); +`endif +`endif +`endif + +endmodule // hci_core_source_v2