From cbbecdf6c01e0ad3efef9487b4a4af091f0c0b1e Mon Sep 17 00:00:00 2001 From: Georg Rutishauser Date: Fri, 7 Jun 2024 16:19:31 +0200 Subject: [PATCH 01/67] tb: Fix mock UART to print from all cores --- tb/mock_uart.sv | 4 +-- tb/mock_uart_axi.sv | 65 +++++++++++++++++++++++++------------------ tb/pulp_cluster_tb.sv | 10 ++++--- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/tb/mock_uart.sv b/tb/mock_uart.sv index 3869fa28..9428b6c6 100644 --- a/tb/mock_uart.sv +++ b/tb/mock_uart.sv @@ -11,7 +11,7 @@ // Author: Florian Zaruba, ETH Zurich // Date: 28/09/2018 // Description: Mock replacement for UART in testbench (not synthesiesable!) - + module mock_uart #( parameter int UART_IDX = 0 )( @@ -60,7 +60,7 @@ module mock_uart #( function void uart_tx(byte ch); if(ch==8'h0A) begin - $display("[TB UART] %s", stringa); + $display("[TB UART %2d] %s", UART_IDX, stringa); charnum = 0; stringa = '0; end else begin diff --git a/tb/mock_uart_axi.sv b/tb/mock_uart_axi.sv index 80406a87..5c580796 100644 --- a/tb/mock_uart_axi.sv +++ b/tb/mock_uart_axi.sv @@ -16,22 +16,24 @@ module mock_uart_axi #( parameter int unsigned AxiIw = 0, parameter int unsigned AxiAw = 0, parameter int unsigned AxiDw = 0, - parameter int unsigned AxiUw = 0 + parameter int unsigned AxiUw = 0, + parameter logic [AxiAw-1:0] BaseAddr = 0, + parameter int unsigned N_CORES = 8 )( input logic clk_i, input logic rst_ni, input logic test_i, AXI_BUS.Slave uart ); - - logic uart_penable; - logic uart_pwrite; - logic [AxiAw-1:0] uart_paddr; - logic uart_psel; - logic [31:0] uart_pwdata; - logic [31:0] uart_prdata; - logic uart_pready; - logic uart_pslverr; + + logic uart_penable; + logic uart_pwrite; + logic [AxiAw-1:0] uart_paddr; + logic [N_CORES-1:0] uart_psel; + logic [31:0] uart_pwdata; + logic [N_CORES-1:0][31:0] uart_prdata ; + logic [N_CORES-1:0] uart_pready; + logic [N_CORES-1:0] uart_pslverr; AXI_LITE #( .AXI_DATA_WIDTH(AxiDw), @@ -77,12 +79,15 @@ module mock_uart_axi #( logic [AxiAw-1:0] end_addr; } rule_t; - rule_t [0:0] rule; - assign rule[0] = '{0, '0, '1}; - + rule_t [0:N_CORES-1] rule; + // each mock UART only has 2 words of address space + for (genvar g = 0; g < N_CORES; g++) begin: gen_rule + assign rule[g] = '{g, BaseAddr + 8*g, BaseAddr + 8*(g+1)-1}; + end // gen_rule + axi_lite_to_apb_intf #( - .NoApbSlaves (1), - .NoRules (1), + .NoApbSlaves (N_CORES), + .NoRules (N_CORES), .AddrWidth (AxiAw), .DataWidth (32), .PipelineRequest (1'b0), @@ -106,17 +111,23 @@ module mock_uart_axi #( ); /* pragma translate_off */ - mock_uart i_mock_uart0 ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .penable_i ( uart_penable ), - .pwrite_i ( uart_pwrite ), - .paddr_i ( uart_paddr[31:0] ), - .psel_i ( uart_psel ), - .pwdata_i ( uart_pwdata ), - .prdata_o ( uart_prdata ), - .pready_o ( uart_pready ), - .pslverr_o ( uart_pslverr ) - ); + + for (genvar g = 0; g < N_CORES; g++) begin: gen_mock_uart + // one mock UART per core + mock_uart #( + .UART_IDX ( g ) + ) i_mock_uart ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .penable_i ( uart_penable ), + .pwrite_i ( uart_pwrite ), + .paddr_i ( uart_paddr - g*8 ), // Mock UART expects to be addressed starting at 0 + .psel_i ( uart_psel[g] ), + .pwdata_i ( uart_pwdata ), + .prdata_o ( uart_prdata[g] ), + .pready_o ( uart_pready[g] ), + .pslverr_o ( uart_pslverr[g] ) + ); + end // gen_mock_uart endmodule diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index aee69faf..237535d7 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -176,10 +176,12 @@ module pulp_cluster_tb; ); mock_uart_axi #( - .AxiIw ( AxiIwMst ), - .AxiAw ( AxiAw ), - .AxiDw ( AxiDw ), - .AxiUw ( AxiUw ) + .AxiIw ( AxiIwMst ), + .AxiAw ( AxiAw ), + .AxiDw ( AxiDw ), + .AxiUw ( AxiUw ), + .N_CORES ( 8 ), + .BaseAddr( 32'h4000_0000 ) ) i_mock_uart ( .clk_i ( s_clk ), .rst_ni ( s_rstn ), From 803eafeeb5404fd93bf6c9b3f4664d73d9c50d8b Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 13 Feb 2025 13:35:13 +0100 Subject: [PATCH 02/67] bender: Bump neureka and hci versions to Arpan's --- Bender.lock | 7 ++----- Bender.yml | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Bender.lock b/Bender.lock index cdfd278e..3bfb16a2 100644 --- a/Bender.lock +++ b/Bender.lock @@ -108,7 +108,7 @@ packages: dependencies: - common_cells hci: - revision: aed9005c761827c6cbff2ea9a15f9cc37acd1169 + revision: df0540ad43ae3b693c040587a50477558f183374 version: null source: Git: https://github.com/pulp-platform/hci.git @@ -117,8 +117,6 @@ packages: - common_cells - hwpe-stream - l2_tcdm_hybrid_interco - - redundancy_cells - - register_interface hier-icache: revision: 2886cb2a46cea3e2bd2d979b505d88fadfbe150c version: null @@ -182,7 +180,7 @@ packages: dependencies: - common_cells neureka: - revision: 94528df2bc6d5eedc0439bd403c2ad005f0a7519 + revision: 8e0883bd0c19c6844122456c749c2bdfde18011f version: null source: Git: https://github.com/pulp-platform/neureka.git @@ -190,7 +188,6 @@ packages: - hci - hwpe-ctrl - hwpe-stream - - register_interface - zeroriscy per2axi: revision: 95bf23119b47fc171d9ed3734c431f71cffd9350 diff --git a/Bender.yml b/Bender.yml index 0638aa16..dc35a7be 100644 --- a/Bender.yml +++ b/Bender.yml @@ -30,11 +30,11 @@ dependencies: cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 } ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } - hci: { git: "https://github.com/pulp-platform/hci.git", rev: astral-v1.0 } + hci: { git: "https://github.com/pulp-platform/hci.git", rev: df0540ad43ae3b693c040587a50477558f183374 } register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: astral-v1.0 } redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 } - neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: astral-v1.0 } + neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: 8e0883bd0c19c6844122456c749c2bdfde18011f } softex: { git: "https://github.com/belanoa/softex.git" , rev: astral-v1.0 } export_include_dirs: From ab8497551cc3f4dbcb025f3354b8f5c8f4e6b1ee Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 13 Feb 2025 16:52:02 +0100 Subject: [PATCH 03/67] bender: Update HCI to Astral+pv2 version --- Bender.lock | 2 +- Bender.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Bender.lock b/Bender.lock index 3bfb16a2..98ff550a 100644 --- a/Bender.lock +++ b/Bender.lock @@ -108,7 +108,7 @@ packages: dependencies: - common_cells hci: - revision: df0540ad43ae3b693c040587a50477558f183374 + revision: 2d5ae7877125a8b5415ae343f1c7c401d4596e15 version: null source: Git: https://github.com/pulp-platform/hci.git diff --git a/Bender.yml b/Bender.yml index dc35a7be..24cad5cc 100644 --- a/Bender.yml +++ b/Bender.yml @@ -30,7 +30,7 @@ dependencies: cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 } ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } - hci: { git: "https://github.com/pulp-platform/hci.git", rev: df0540ad43ae3b693c040587a50477558f183374 } + hci: { git: "https://github.com/pulp-platform/hci.git", rev: 2d5ae7877125a8b5415ae343f1c7c401d4596e15 } # branch: smazzola/chimera register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: astral-v1.0 } redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 } From adb8d5e0382ce3eef1f448c167ccb714f62c7385 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 25 Feb 2025 19:57:38 +0100 Subject: [PATCH 04/67] hw,bender: Update iDMA wrap --- Bender.lock | 19 +- Bender.yml | 2 +- bender-common.mk | 2 +- rtl/idma_wrap.sv | 1168 ++++++++++++++++++++++++++++++------------- rtl/pulp_cluster.sv | 14 +- 5 files changed, 845 insertions(+), 360 deletions(-) diff --git a/Bender.lock b/Bender.lock index 98ff550a..91d2c3ab 100644 --- a/Bender.lock +++ b/Bender.lock @@ -48,6 +48,13 @@ packages: - register_interface - scm - tech_cells_generic + axi_stream: + revision: 54891ff40455ca94a37641b9da4604647878cc07 + version: 0.1.1 + source: + Git: https://github.com/pulp-platform/axi_stream.git + dependencies: + - common_cells cluster_interconnect: revision: 1284def6c0b7f7e9355eb093d00883ad9dead1b7 version: null @@ -157,14 +164,16 @@ packages: Git: https://github.com/pulp-platform/icache-intc.git dependencies: [] idma: - revision: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1 + revision: 4b3393b033b15641b8669970462ff39f4e00d207 version: null source: Git: https://github.com/pulp-platform/iDMA.git dependencies: - axi + - axi_stream - common_cells - common_verification + - obi - register_interface l2_tcdm_hybrid_interco: revision: fa55e72859dcfb117a2788a77352193bef94ff2b @@ -189,6 +198,14 @@ packages: - hwpe-ctrl - hwpe-stream - zeroriscy + obi: + revision: 2ee698211db5c007bd0d207ebd646ce79f0472ec + version: null + source: + Git: https://github.com/pulp-platform/obi.git + dependencies: + - common_cells + - common_verification per2axi: revision: 95bf23119b47fc171d9ed3734c431f71cffd9350 version: null diff --git a/Bender.yml b/Bender.yml index 24cad5cc..fe6856a1 100644 --- a/Bender.yml +++ b/Bender.yml @@ -18,7 +18,7 @@ dependencies: cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", rev: v1.3.0 } event_unit_flex: { git: "https://github.com/pulp-platform/event_unit_flex.git", rev: astral-v1.0 } mchan: { git: "https://github.com/pulp-platform/mchan.git", rev: v1.2.4 } - idma: { git: "https://github.com/pulp-platform/iDMA.git", rev: v0.6.3 } + idma: { git: "https://github.com/pulp-platform/idma.git", rev: "4b3393b033b15641b8669970462ff39f4e00d207" } # branch: smazzola/chimera hier-icache: { git: "https://github.com/pulp-platform/hier-icache.git", rev: v1.3.0 } cluster_icache: { git: "https://github.com/pulp-platform/cluster_icache.git", rev: v0.2.0 } cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", rev: v2.2.0 } diff --git a/bender-common.mk b/bender-common.mk index ca6c866d..8a1a4a8b 100644 --- a/bender-common.mk +++ b/bender-common.mk @@ -11,7 +11,7 @@ common_defs += -D CLUSTER_ALIAS common_defs += -D USE_PULP_PARAMETERS common_targs += -t rtl -common_targs += -t mchan +# common_targs += -t mchan common_targs += -t cluster_standalone common_targs += -t scm_use_fpga_scm common_targs += -t cv32e40p_use_ff_regfile \ No newline at end of file diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv index ab4954b0..bcfc0268 100644 --- a/rtl/idma_wrap.sv +++ b/rtl/idma_wrap.sv @@ -3,67 +3,73 @@ // SPDX-License-Identifier: SHL-0.51 /* - * dmac_wrap.sv + * idma_wrap.sv * Thomas Benz * Michael Rogenmoser + * Georg Rutishauser */ // DMA Core wrapper `include "axi/assign.svh" `include "axi/typedef.svh" +`include "obi/typedef.svh" `include "idma/typedef.svh" `include "register_interface/typedef.svh" +`define MY_MAX(a,b) (a > b ? a : b) + module dmac_wrap #( - parameter int unsigned NB_CORES = 4, - parameter int unsigned AXI_ADDR_WIDTH = 32, - parameter int unsigned AXI_DATA_WIDTH = 64, - parameter int unsigned AXI_USER_WIDTH = 6, - parameter int unsigned AXI_ID_WIDTH = 4, - parameter int unsigned PE_ID_WIDTH = 1, - parameter int unsigned NB_PE_PORTS = 1, - parameter int unsigned DATA_WIDTH = 32, - parameter int unsigned ADDR_WIDTH = 32, - parameter int unsigned BE_WIDTH = DATA_WIDTH/8, - parameter int unsigned NUM_STREAMS = 1, // Only 1 for now - parameter int unsigned TCDM_SIZE = 0, - parameter int unsigned TwoDMidend = 1, // Leave this on for now - parameter int unsigned NB_OUTSND_BURSTS = 8, - parameter int unsigned GLOBAL_QUEUE_DEPTH = 16, - parameter int unsigned BACKEND_QUEUE_DEPTH = 16, - parameter logic [AXI_ADDR_WIDTH-1:0] ClusterBaseAddr = 'h10000000, - parameter type axi_req_t = logic, - parameter type axi_resp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - input logic test_mode_i, - XBAR_PERIPH_BUS.Slave pe_ctrl_slave[NB_PE_PORTS-1:0], - hci_core_intf.target ctrl_slave[0:NB_CORES-1], - hci_core_intf.initiator tcdm_master[0:3], - output axi_req_t ext_master_req_o, - input axi_resp_t ext_master_resp_i, - output logic [NB_CORES-1:0] term_event_o, - output logic [NB_CORES-1:0] term_irq_o, - output logic [NB_PE_PORTS-1:0] term_event_pe_o, - output logic [NB_PE_PORTS-1:0] term_irq_pe_o, - output logic busy_o -); - - localparam int unsigned NumRegs = NB_CORES+NB_PE_PORTS; - localparam int unsigned MstIdxWidth = AXI_ID_WIDTH; - localparam int unsigned SlvIdxWidth = AXI_ID_WIDTH - $clog2(NUM_STREAMS); + parameter int unsigned NB_CORES = 4, + parameter int unsigned AXI_ADDR_WIDTH = 32, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned AXI_USER_WIDTH = 6, + parameter int unsigned AXI_ID_WIDTH = 4, + parameter int unsigned PE_ID_WIDTH = 1, + parameter int unsigned NB_PE_PORTS = 1, + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned ADDR_WIDTH = 32, + parameter int unsigned BE_WIDTH = DATA_WIDTH / 8, + parameter type axi_req_t = logic, + parameter type axi_resp_t = logic, + // bidirectional streams: range 1 to 8 + parameter int unsigned NUM_BIDIR_STREAMS = 1, + parameter int unsigned NB_OUTSND_BURSTS = 8, + // queue depth per stream + parameter int unsigned GLOBAL_QUEUE_DEPTH = 2, + // mux read ports between tcdm-tcdm and tcdm-axi? + parameter bit MUX_READ = 1'b0, + // 4 ports per stream if read ports muxed, otherwise 6 + localparam int unsigned NB_TCDM_PORTS_PER_STRM = 4 + (!MUX_READ) * 2 +) ( // verilog_format: off // verible does not manage to align this :( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + XBAR_PERIPH_BUS.Slave pe_ctrl_slave[NB_PE_PORTS-1:0], + hci_core_intf.target ctrl_slave[0:NB_CORES-1], + hci_core_intf.initiator tcdm_master[NB_TCDM_PORTS_PER_STRM*NUM_BIDIR_STREAMS-1:0], + output axi_req_t [NUM_BIDIR_STREAMS-1:0] ext_master_req_o, + input axi_resp_t [NUM_BIDIR_STREAMS-1:0] ext_master_resp_i, + output logic [NB_CORES-1:0] term_event_o, + output logic [NB_CORES-1:0] term_irq_o, + output logic [NB_PE_PORTS-1:0] term_event_pe_o, + output logic [NB_PE_PORTS-1:0] term_irq_pe_o, + output logic busy_o +); // verilog_format: on + + localparam int unsigned NumRegs = NB_CORES + NB_PE_PORTS; + localparam int unsigned NumStreams = 32'd2 * NUM_BIDIR_STREAMS; + localparam int unsigned StreamWidth = cf_math_pkg::idx_width(NumStreams); // CORE --> MCHAN CTRL INTERFACE BUS SIGNALS - logic [NumRegs-1:0][DATA_WIDTH-1:0] config_wdata; - logic [NumRegs-1:0][ADDR_WIDTH-1:0] config_add; + logic [NumRegs-1:0][ DATA_WIDTH-1:0] config_wdata; + logic [NumRegs-1:0][ ADDR_WIDTH-1:0] config_add; logic [NumRegs-1:0] config_req; logic [NumRegs-1:0] config_wen; - logic [NumRegs-1:0][BE_WIDTH-1:0] config_be; + logic [NumRegs-1:0][ BE_WIDTH-1:0] config_be; logic [NumRegs-1:0][PE_ID_WIDTH-1:0] config_id; logic [NumRegs-1:0] config_gnt; - logic [NumRegs-1:0][DATA_WIDTH-1:0] config_r_rdata; + logic [NumRegs-1:0][ DATA_WIDTH-1:0] config_r_rdata; logic [NumRegs-1:0] config_r_valid; logic [NumRegs-1:0] config_r_opc; logic [NumRegs-1:0][PE_ID_WIDTH-1:0] config_r_id; @@ -79,16 +85,16 @@ module dmac_wrap #( assign ctrl_slave[i].gnt = config_gnt[i]; assign ctrl_slave[i].r_opc = config_r_opc[i]; assign ctrl_slave[i].r_valid = config_r_valid[i]; - assign ctrl_slave[i].r_data = config_r_rdata[i]; + assign ctrl_slave[i].r_data = config_r_rdata[i]; end for (genvar i = 0; i < NB_PE_PORTS; i++) begin : gen_pe_ctrl_registers - assign config_add[NB_CORES+i] = pe_ctrl_slave[i].add; - assign config_req[NB_CORES+i] = pe_ctrl_slave[i].req; - assign config_wdata[NB_CORES+i] = pe_ctrl_slave[i].wdata; - assign config_wen[NB_CORES+i] = pe_ctrl_slave[i].wen; - assign config_be[NB_CORES+i] = pe_ctrl_slave[i].be; - assign config_id[NB_CORES+i] = pe_ctrl_slave[i].id; + assign config_add[NB_CORES+i] = pe_ctrl_slave[i].add; + assign config_req[NB_CORES+i] = pe_ctrl_slave[i].req; + assign config_wdata[NB_CORES+i] = pe_ctrl_slave[i].wdata; + assign config_wen[NB_CORES+i] = pe_ctrl_slave[i].wen; + assign config_be[NB_CORES+i] = pe_ctrl_slave[i].be; + assign config_id[NB_CORES+i] = pe_ctrl_slave[i].id; assign pe_ctrl_slave[i].gnt = config_gnt[NB_CORES+i]; assign pe_ctrl_slave[i].r_opc = config_r_opc[NB_CORES+i]; assign pe_ctrl_slave[i].r_valid = config_r_valid[NB_CORES+i]; @@ -96,74 +102,124 @@ module dmac_wrap #( assign pe_ctrl_slave[i].r_id = config_r_id[NB_CORES+i]; end - // AXI4+ATOP types - typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; - typedef logic [ADDR_WIDTH-1:0] mem_addr_t; - typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [SlvIdxWidth-1:0] slv_id_t; - typedef logic [MstIdxWidth-1:0] mst_id_t; + // Types types + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [ADDR_WIDTH-1:0] mem_addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_ID_WIDTH-1:0] id_t; typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; - typedef logic [AXI_USER_WIDTH-1:0] user_t; - // AXI4+ATOP channels typedefs - `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t) - `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_AW_CHAN_T(mem_aw_chan_t, mem_addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(mem_ar_chan_t, mem_addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t) - `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) - `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) - `AXI_TYPEDEF_REQ_T(mem_req_t, mem_aw_chan_t, w_chan_t, mem_ar_chan_t) - `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) - `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) + typedef logic [AXI_USER_WIDTH-1:0] user_t; + + // // AXI4+ATOP channels typedefs + //`AXI_TYPEDEF_ALL(axi_int, addr_t, id_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, data_t, id_t, user_t) + // Memory Init typedefs + /// init read request + typedef struct packed { + logic [AXI_ADDR_WIDTH-1:0] cfg; + logic [AXI_DATA_WIDTH-1:0] term; + logic [AXI_DATA_WIDTH/8-1:0] strb; + logic [AXI_ID_WIDTH-1:0] id; + } init_req_chan_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic req_valid; + logic rsp_ready; + } init_req_t; + + typedef struct packed {logic [AXI_DATA_WIDTH-1:0] init;} init_rsp_chan_t; + + typedef struct packed { + init_rsp_chan_t rsp_chan; + logic rsp_valid; + logic req_ready; + } init_rsp_t; + + // OBI typedefs + `OBI_TYPEDEF_MINIMAL_A_OPTIONAL(a_optional_t) + `OBI_TYPEDEF_MINIMAL_R_OPTIONAL(r_optional_t) + `OBI_TYPEDEF_A_CHAN_T(obi_a_chan_t, AXI_ADDR_WIDTH, AXI_DATA_WIDTH, 0, a_optional_t) + `OBI_TYPEDEF_R_CHAN_T(obi_r_chan_t, AXI_DATA_WIDTH, 0, r_optional_t) + `OBI_TYPEDEF_REQ_T(obi_req_t, obi_a_chan_t) + `OBI_TYPEDEF_RSP_T(obi_rsp_t, obi_r_chan_t) + + + obi_req_t [NUM_BIDIR_STREAMS-1:0] + obi_read_req_from_dma, obi_read_req_from_rrc, obi_reorg_req_from_dma, obi_reorg_req_from_rrc, obi_write_req_from_dma, obi_write_req_from_rrc, obi_read_req_muxed; + obi_rsp_t [NUM_BIDIR_STREAMS-1:0] + obi_read_rsp_to_dma, obi_read_rsp_to_rrc, obi_reorg_rsp_to_dma, obi_reorg_rsp_to_rrc, obi_write_rsp_to_dma, obi_write_rsp_to_rrc, obi_read_rsp_to_mux; + + // BUS definitions - mst_req_t tcdm_req, soc_req; - mem_req_t tcdm_mem_req; - mst_resp_t soc_rsp; - mst_resp_t tcdm_rsp; - slv_req_t [NUM_STREAMS-1:0] dma_req; - slv_resp_t [NUM_STREAMS-1:0] dma_rsp; + axi_req_t [NUM_BIDIR_STREAMS-1:0] soc_req; + axi_resp_t [NUM_BIDIR_STREAMS-1:0] soc_rsp; + axi_req_t [ NumStreams-1:0] dma_req; + axi_resp_t [ NumStreams-1:0] dma_rsp; + // interface to structs - //`AXI_ASSIGN_FROM_REQ(ext_master_req_o, soc_req) - //`AXI_ASSIGN_TO_RESP(soc_rsp, ext_master_resp_i) + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_connect_interface + assign ext_master_req_o[s] = soc_req[s]; + assign soc_rsp[s] = ext_master_resp_i[s]; + end - `AXI_ASSIGN_REQ_STRUCT(ext_master_req_o, soc_req) - `AXI_ASSIGN_RESP_STRUCT(soc_rsp, ext_master_resp_i) + // connect RW axi buses + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_rw_axi_connection + axi_rw_join #( + .axi_req_t (axi_req_t), + .axi_resp_t(axi_resp_t) + ) i_init_axi_rw_join ( + .clk_i, + .rst_ni, + .slv_read_req_i (dma_req[2*s+1]), + .slv_read_resp_o (dma_rsp[2*s+1]), + .slv_write_req_i (dma_req[2*s]), + .slv_write_resp_o(dma_rsp[2*s]), + .mst_req_o (soc_req[s]), + .mst_resp_i (soc_rsp[s]) + ); + end // Register BUS definitions - `REG_BUS_TYPEDEF_ALL(dma_regs, logic[9:0], logic[31:0], logic[3:0]) + localparam int unsigned RegAddrWidth = 32'd10; + `REG_BUS_TYPEDEF_ALL(dma_regs, logic[RegAddrWidth-1:0], logic[DATA_WIDTH-1:0], + logic[BE_WIDTH-1:0]) dma_regs_req_t [NumRegs-1:0] dma_regs_req; dma_regs_rsp_t [NumRegs-1:0] dma_regs_rsp; // iDMA struct definitions - localparam int unsigned TFLenWidth = AXI_ADDR_WIDTH; - localparam int unsigned NumDim = 2; // Support 2D midend for 2D transfers - localparam int unsigned RepWidth = 32; - localparam int unsigned StrideWidth = 32; - typedef logic [TFLenWidth-1:0] tf_len_t; - typedef logic [RepWidth-1:0] reps_t; + localparam int unsigned TFLenWidth = AXI_ADDR_WIDTH; + localparam int unsigned NumDim = 32'd3; // Support 2D midend for 2D transfers + localparam int unsigned RepWidth = 32'd32; + localparam int unsigned StrideWidth = 32'd32; + typedef logic [TFLenWidth-1:0] tf_len_t; + typedef logic [RepWidth-1:0] reps_t; typedef logic [StrideWidth-1:0] strides_t; // iDMA request / response types - `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, slv_id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, id_t, addr_t, tf_len_t) `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) // iDMA ND request `IDMA_TYPEDEF_FULL_ND_REQ_T(idma_nd_req_t, idma_req_t, reps_t, strides_t) - idma_nd_req_t twod_req, twod_req_queue; - idma_req_t burst_req; - idma_rsp_t idma_rsp; + logic [StreamWidth-1:0] stream_idx; + + idma_nd_req_t [NumStreams-1:0] twod_req, twod_req_queue; + idma_req_t [NumStreams-1:0] idma_req; + idma_rsp_t [NumStreams-1:0] idma_rsp; + + logic one_fe_valid; + logic [NumStreams-1:0] fe_valid, twod_queue_valid, be_valid, be_rsp_valid; + logic [NumStreams-1:0] fe_ready, twod_queue_ready, be_ready, be_rsp_ready; + logic [NumStreams-1:0] trans_complete, midend_busy; + idma_pkg::idma_busy_t [NumStreams-1:0] idma_busy; - logic fe_valid, twod_queue_valid, be_valid, be_rsp_valid; - logic fe_ready, twod_queue_ready, be_ready, be_rsp_ready; - logic trans_complete, midend_busy; - idma_pkg::idma_busy_t idma_busy; + logic [31:0][NumStreams-1:0] done_id, next_id; // ------------------------------------------------------ // FRONTEND @@ -171,291 +227,699 @@ module dmac_wrap #( for (genvar i = 0; i < NumRegs; i++) begin : gen_core_regs periph_to_reg #( - .AW ( 10 ), - .DW ( 32 ), - .BW ( 8 ), - .IW ( PE_ID_WIDTH ), - .req_t ( dma_regs_req_t ), - .rsp_t ( dma_regs_rsp_t ) + .AW (RegAddrWidth), + .DW (DATA_WIDTH), + .BW (BE_WIDTH), + .IW (PE_ID_WIDTH), + .req_t(dma_regs_req_t), + .rsp_t(dma_regs_rsp_t) ) i_pe_translate ( .clk_i, .rst_ni, - .req_i ( config_req [i] ), - .add_i ( config_add [i][9:0] ), - .wen_i ( config_wen [i] ), - .wdata_i ( config_wdata [i] ), - .be_i ( config_be [i] ), - .id_i ( config_id [i] ), - .gnt_o ( config_gnt [i] ), - .r_rdata_o ( config_r_rdata [i] ), - .r_opc_o ( config_r_opc [i] ), - .r_id_o ( config_r_id [i] ), - .r_valid_o ( config_r_valid [i] ), - .reg_req_o ( dma_regs_req [i] ), - .reg_rsp_i ( dma_regs_rsp [i] ) + .req_i (config_req[i]), + .add_i (config_add[i][RegAddrWidth-1:0]), + .wen_i (config_wen[i]), + .wdata_i (config_wdata[i]), + .be_i (config_be[i]), + .id_i (config_id[i]), + .gnt_o (config_gnt[i]), + .r_rdata_o(config_r_rdata[i]), + .r_opc_o (config_r_opc[i]), + .r_id_o (config_r_id[i]), + .r_valid_o(config_r_valid[i]), + .reg_req_o(dma_regs_req[i]), + .reg_rsp_i(dma_regs_rsp[i]) ); end - idma_reg32_2d_frontend #( - .NumRegs ( NumRegs ), - .IdCounterWidth ( 28 ), - .dma_regs_req_t ( dma_regs_req_t ), - .dma_regs_rsp_t ( dma_regs_rsp_t ), - .burst_req_t ( idma_nd_req_t ) - ) i_idma_reg32_2d_frontend ( + idma_reg32_3d #( + .NumRegs (NumRegs), + .NumStreams (NumStreams), + .IdCounterWidth(32'd32), + .reg_req_t (dma_regs_req_t), + .reg_rsp_t (dma_regs_rsp_t), + .dma_req_t (idma_nd_req_t) + ) i_idma_reg32_3d ( .clk_i, .rst_ni, - .dma_ctrl_req_i ( dma_regs_req ), - .dma_ctrl_rsp_o ( dma_regs_rsp ), - .burst_req_o ( twod_req ), - .valid_o ( fe_valid ), - .ready_i ( fe_ready ), - .backend_idle_i ( ~busy_o ), - .trans_complete_i ( trans_complete ) + .dma_ctrl_req_i(dma_regs_req), + .dma_ctrl_rsp_o(dma_regs_rsp), + .dma_req_o (twod_req), + .req_valid_o (one_fe_valid), + .req_ready_i (fe_ready[stream_idx]), + .next_id_i (next_id[stream_idx]), + .stream_idx_o (stream_idx), + .done_id_i (done_id), + .busy_i (idma_busy), + .midend_busy_i (midend_busy) ); + always_comb begin : proc_connect_valids + fe_valid = '0; + fe_valid[stream_idx] = one_fe_valid; + end + // interrupts and events (currently broadcast tx_cplt event only) assign term_event_pe_o = |trans_complete ? '1 : '0; assign term_irq_pe_o = '0; assign term_event_o = |trans_complete ? '1 : '0; assign term_irq_o = '0; - assign busy_o = midend_busy | |idma_busy; + assign busy_o = |midend_busy | |idma_busy; - // ------------------------------------------------------ - // MIDEND - // ------------------------------------------------------ + for (genvar s = 0; s < NumStreams; s++) begin : gen_streams - // global (2D) request FIFO - stream_fifo #( - .DEPTH ( GLOBAL_QUEUE_DEPTH ), - .T (idma_nd_req_t ) - ) i_2D_request_fifo ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( test_mode_i ), - .usage_o (/*NOT CONNECTED*/), - - .data_i ( twod_req ), - .valid_i ( fe_valid ), - .ready_o ( fe_ready ), - - .data_o ( twod_req_queue ), - .valid_o ( twod_queue_valid ), - .ready_i ( twod_queue_ready ) - ); - - localparam logic [1:0][31:0] RepWidths = '{default: 32'd32}; + // ------------------------------------------------------ + // ID counters + // ------------------------------------------------------ + idma_transfer_id_gen #( + .IdWidth(32'd32) + ) i_idma_transfer_id_gen ( + .clk_i, + .rst_ni, + .issue_i (fe_valid[s] & fe_ready[s]), + .retire_i (trans_complete[s]), + .next_o (next_id[s]), + .completed_o(done_id[s]) + ); - idma_nd_midend #( - .NumDim ( NumDim ), - .addr_t ( addr_t ), - .idma_req_t ( idma_req_t ), - .idma_rsp_t ( idma_rsp_t ), - .idma_nd_req_t( idma_nd_req_t ), - .RepWidths ( RepWidths ) - ) i_idma_2D_midend ( - .clk_i, - .rst_ni, - .nd_req_i ( twod_req_queue ), - .nd_req_valid_i ( twod_queue_valid ), - .nd_req_ready_o ( twod_queue_ready ), + // ------------------------------------------------------ + // MIDEND + // ------------------------------------------------------ + // global (2D) request FIFO + stream_fifo #( + .DEPTH(GLOBAL_QUEUE_DEPTH), + .T (idma_nd_req_t) + ) i_3D_request_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(test_mode_i), + .usage_o ( /*NOT CONNECTED*/), + .data_i (twod_req), + .valid_i (fe_valid[s]), + .ready_o (fe_ready[s]), + .data_o (twod_req_queue[s]), + .valid_o (twod_queue_valid[s]), + .ready_i (twod_queue_ready[s]) + ); - .nd_rsp_o (/*NOT CONNECTED*/ ), - .nd_rsp_valid_o ( trans_complete ), - .nd_rsp_ready_i ( 1'b1 ), // Always ready to accept completed transfers + localparam logic [1:0][31:0] RepWidths = '{default: 32'd32}; - .burst_req_o ( burst_req ), - .burst_req_valid_o( be_valid ), - .burst_req_ready_i( be_ready ), + idma_nd_midend #( + .NumDim (NumDim), + .addr_t (addr_t), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .idma_nd_req_t(idma_nd_req_t), + .RepWidths (RepWidths) + ) i_idma_3D_midend ( + .clk_i, + .rst_ni, + .nd_req_i (twod_req_queue[s]), + .nd_req_valid_i (twod_queue_valid[s]), + .nd_req_ready_o (twod_queue_ready[s]), + .nd_rsp_o ( /*NOT CONNECTED*/), + .nd_rsp_valid_o (trans_complete[s]), + .nd_rsp_ready_i (1'b1), // Always ready to accept completed transfers + .burst_req_o (idma_req[s]), + .burst_req_valid_o(be_valid[s]), + .burst_req_ready_i(be_ready[s]), + .burst_rsp_i (idma_rsp[s]), + .burst_rsp_valid_i(be_rsp_valid[s]), + .burst_rsp_ready_o(be_rsp_ready[s]), + .busy_o (midend_busy[s]) + ); - .burst_rsp_i ( idma_rsp ), - .burst_rsp_valid_i( be_rsp_valid ), - .burst_rsp_ready_o( be_rsp_ready ), + // ------------------------------------------------------ + // BACKEND + // ------------------------------------------------------ + + // even channels: copy out data + if (s[0] == 1'b0) begin : gen_cpy_out + + // Meta Channel Widths + localparam int unsigned axi_aw_chan_width = axi_pkg::aw_width( + AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH + ); + localparam int unsigned init_req_chan_width = $bits(init_req_chan_t); + localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t); + + + typedef struct packed { + init_req_chan_t req_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-init_req_chan_width:0] padding; + } init_read_req_chan_padded_t; + + typedef struct packed { + obi_a_chan_t a_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-obi_a_chan_width:0] padding; + } obi_read_a_chan_padded_t; + + typedef union packed { + init_read_req_chan_padded_t init; + obi_read_a_chan_padded_t obi; + } read_meta_channel_t; + + typedef struct packed { + axi_aw_chan_t aw_chan; + logic [`MY_MAX(axi_aw_chan_width, init_req_chan_width)-axi_aw_chan_width:0] padding; + } axi_write_aw_chan_padded_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic [`MY_MAX(axi_aw_chan_width, init_req_chan_width)-init_req_chan_width:0] padding; + } init_write_req_chan_padded_t; + + typedef union packed { + axi_write_aw_chan_padded_t axi; + init_write_req_chan_padded_t init; + } write_meta_channel_t; + + // local buses + init_req_t init_read_req, init_write_req; + init_rsp_t init_read_rsp, init_write_rsp; + + idma_backend_r_obi_rw_init_w_axi #( + .DataWidth (AXI_DATA_WIDTH), + .AddrWidth (AXI_ADDR_WIDTH), + .UserWidth (AXI_USER_WIDTH), + .AxiIdWidth (AXI_ID_WIDTH), + .NumAxInFlight (NB_OUTSND_BURSTS), + .BufferDepth (32'd3), + .TFLenWidth (TFLenWidth), + .MemSysDepth (32'd0), + .CombinedShifter (1'b0), + .RAWCouplingAvail (1'b0), + .MaskInvalidData (1'b0), + .HardwareLegalizer (1'b1), + .RejectZeroTransfers (1'b1), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .idma_eh_req_t (idma_pkg::idma_eh_req_t), + .idma_busy_t (idma_pkg::idma_busy_t), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_resp_t), + .init_req_t (init_req_t), + .init_rsp_t (init_rsp_t), + .obi_req_t (obi_req_t), + .obi_rsp_t (obi_rsp_t), + .read_meta_channel_t (read_meta_channel_t), + .write_meta_channel_t(write_meta_channel_t) + ) i_idma_backend_r_obi_rw_init_w_axi ( + .clk_i, + .rst_ni, + .testmode_i (test_mode_i), + .idma_req_i (idma_req[s]), + .req_valid_i (be_valid[s]), + .req_ready_o (be_ready[s]), + .idma_rsp_o (idma_rsp[s]), + .rsp_valid_o (be_rsp_valid[s]), + .rsp_ready_i (be_rsp_ready[s]), + .idma_eh_req_i ('b0), + .eh_req_valid_i (1'b0), + .eh_req_ready_o ( /* NOT CONNECTED */), + .init_read_req_o (init_read_req), + .init_read_rsp_i (init_read_rsp), + .obi_read_req_o (obi_read_req_from_dma[s/2]), + .obi_read_rsp_i (obi_read_rsp_to_dma[s/2]), + .axi_write_req_o (dma_req[s]), + .axi_write_rsp_i (dma_rsp[s]), + .init_write_req_o(init_write_req), + .init_write_rsp_i(init_write_rsp), + .busy_o (idma_busy[s]) + ); + + // use a spill register to only give responses when a request was + // (or is) asserted + spill_register #( + .T(logic[-1:0]) + ) + i_init_read_rsp_reflect ( + .clk_i, + .rst_ni, + .valid_i(init_read_req.req_valid), + .ready_o(init_read_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_read_rsp.rsp_valid), + .ready_i(init_read_req.rsp_ready), + .data_o() + ); + + //implement zero memory using init protocol + assign init_read_rsp.rsp_chan.init = '0; + // implement /dev/null + spill_register #( + .T(logic[-1:0]) + ) + i_init_write_rsp_reflect ( + .clk_i, + .rst_ni, + .valid_i(init_write_req.req_valid), + .ready_o(init_write_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_write_rsp.rsp_valid), + .ready_i(init_write_req.rsp_ready), + .data_o() + ); + + assign init_write_rsp.rsp_chan.init = '0; + + // odd channels: copy in data + end else begin : gen_cpy_in + + // Meta Channel Widths + localparam int unsigned axi_ar_chan_width = axi_pkg::ar_width( + AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH + ); + localparam int unsigned init_req_chan_width = $bits(init_req_chan_t); + localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t); + + function int unsigned max_width(input int unsigned a, b); + return (a > b) ? a : b; + endfunction + + typedef struct packed { + axi_ar_chan_t ar_chan; + logic [`MY_MAX( +axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) +)-axi_ar_chan_width:0] padding; + } axi_read_ar_chan_padded_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic [`MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width))-init_req_chan_width:0] padding; + } init_read_req_chan_padded_t; + + typedef struct packed { + obi_a_chan_t a_chan; + logic [`MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width))-obi_a_chan_width:0] padding; + } obi_read_a_chan_padded_t; + + typedef union packed { + axi_read_ar_chan_padded_t axi; + init_read_req_chan_padded_t init; + obi_read_a_chan_padded_t obi; + } read_meta_channel_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-init_req_chan_width:0] padding; + } init_write_req_chan_padded_t; + + typedef struct packed { + obi_a_chan_t a_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-obi_a_chan_width:0] padding; + } obi_write_a_chan_padded_t; + + typedef union packed { + init_write_req_chan_padded_t init; + obi_write_a_chan_padded_t obi; + } write_meta_channel_t; + + // local buses + init_req_t init_read_req, init_write_req; + init_rsp_t init_read_rsp, init_write_rsp; + + idma_backend_r_axi_rw_init_rw_obi #( + .DataWidth (AXI_DATA_WIDTH), + .AddrWidth (AXI_ADDR_WIDTH), + .UserWidth (AXI_USER_WIDTH), + .AxiIdWidth (AXI_ID_WIDTH), + .NumAxInFlight (NB_OUTSND_BURSTS), + .BufferDepth (32'd3), + .TFLenWidth (TFLenWidth), + .MemSysDepth (32'd0), + .CombinedShifter (1'b0), + .RAWCouplingAvail (1'b0), + .MaskInvalidData (1'b0), + .HardwareLegalizer (1'b1), + .RejectZeroTransfers (1'b1), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .idma_eh_req_t (idma_pkg::idma_eh_req_t), + .idma_busy_t (idma_pkg::idma_busy_t), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_resp_t), + .init_req_t (init_req_t), + .init_rsp_t (init_rsp_t), + .obi_req_t (obi_req_t), + .obi_rsp_t (obi_rsp_t), + .read_meta_channel_t (read_meta_channel_t), + .write_meta_channel_t(write_meta_channel_t) + ) i_idma_backend_r_axi_rw_init_rw_obi ( + .clk_i, + .rst_ni, + .testmode_i (test_mode_i), + .idma_req_i (idma_req[s]), + .req_valid_i (be_valid[s]), + .req_ready_o (be_ready[s]), + .idma_rsp_o (idma_rsp[s]), + .rsp_valid_o (be_rsp_valid[s]), + .rsp_ready_i (be_rsp_ready[s]), + .idma_eh_req_i ('b0), + .eh_req_valid_i (1'b0), + .eh_req_ready_o ( /* NOT CONNECTED */), + .axi_read_req_o (dma_req[s]), + .axi_read_rsp_i (dma_rsp[s]), + .init_read_req_o (init_read_req), + .init_read_rsp_i (init_read_rsp), + .obi_read_req_o (obi_reorg_req_from_dma[s/2]), + .obi_read_rsp_i (obi_reorg_rsp_to_dma[s/2]), + .init_write_req_o(init_write_req), + .init_write_rsp_i(init_write_rsp), + .obi_write_req_o (obi_write_req_from_dma[s/2]), + .obi_write_rsp_i (obi_write_rsp_to_dma[s/2]), + .busy_o (idma_busy[s]) + ); + + // use a spill register to only give responses when a request was + // (or is) asserted + spill_register #( + .T(logic[-1:0]) + ) + i_init_read_rsp_reflect ( + .clk_i, + .rst_ni, + .valid_i(init_read_req.req_valid), + .ready_o(init_read_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_read_rsp.rsp_valid), + .ready_i(init_read_req.rsp_ready), + .data_o() + ); + //implement zero memory using init protocol + assign init_read_rsp.rsp_chan.init = '0; + // implement /dev/null + spill_register #( + .T(logic[-1:0]) + ) + i_init_write_rsp_reflect ( + .clk_i, + .rst_ni, + .valid_i(init_write_req.req_valid), + .ready_o(init_write_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_write_rsp.rsp_valid), + .ready_i(init_write_req.rsp_ready), + .data_o() + ); + assign init_write_rsp.rsp_chan.init = '0; + end : gen_cpy_in + end : gen_streams - .busy_o ( midend_busy ) - ); // ------------------------------------------------------ - // BACKEND + // MUX read OBI connections if specified // ------------------------------------------------------ + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin + if (MUX_READ) begin + localparam obi_pkg::obi_cfg_t sbr_obi_cfg = '{ + UseRReady: 1'b1, + CombGnt: 1'b0, + AddrWidth: AXI_ADDR_WIDTH, + DataWidth: AXI_DATA_WIDTH, + IdWidth: 0, + Integrity: 1'b0, + BeFull: 1'b1, + OptionalCfg: obi_pkg::ObiMinimalOptionalConfig + }; + + // iDMA OBI + + obi_mux #( + .SbrPortObiCfg (sbr_obi_cfg), + .MgrPortObiCfg (sbr_obi_cfg), + .sbr_port_obi_req_t(obi_req_t), + .sbr_port_a_chan_t (obi_a_chan_t), + .sbr_port_obi_rsp_t(obi_rsp_t), + .sbr_port_r_chan_t (obi_r_chan_t), + .mgr_port_obi_req_t(obi_req_t), + .mgr_port_obi_rsp_t(obi_rsp_t), + .NumSbrPorts (2), + .NumMaxTrans (2), + .UseIdForRouting (1'b0) + ) obi_read_mux_i ( + .clk_i, + .rst_ni, + .testmode_i (test_mode_i), + .sbr_ports_req_i({obi_reorg_req_from_dma[s], obi_read_req_from_dma[s]}), + .sbr_ports_rsp_o({obi_reorg_rsp_to_dma[s], obi_read_rsp_to_dma[s]}), + .mgr_port_req_o (obi_read_req_muxed[s]), + .mgr_port_rsp_i (obi_read_rsp_to_mux[s]) + ); + assign obi_reorg_req_from_rrc = '0; + assign obi_reorg_rsp_to_rrc = '0; + end else begin // if (MUX_READ) + // pass through the read req/rsp from/to dma + assign obi_read_req_muxed = obi_read_req_from_dma; + assign obi_read_rsp_to_dma = obi_read_rsp_to_mux; + + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .DEPTH(1) + ) + obi_rready_converter_reorg_i ( + .clk_i, + .rst_ni, + .test_mode_i, + .sbr_a_chan_i(obi_reorg_req_from_dma[s].a), + .req_i(obi_reorg_req_from_dma[s].req), + .gnt_o(obi_reorg_rsp_to_dma[s].gnt), + .rready_i(obi_reorg_req_from_dma[s].rready), + .sbr_r_chan_o(obi_reorg_rsp_to_dma[s].r), + .rvalid_o(obi_reorg_rsp_to_dma[s].rvalid), + .mgr_a_chan_o(obi_reorg_req_from_rrc[s].a), + .req_o(obi_reorg_req_from_rrc[s].req), + .rready_o(obi_reorg_req_from_rrc[s].rready), + .mgr_r_chan_i(obi_reorg_rsp_to_rrc[s].r), + .gnt_i(obi_reorg_rsp_to_rrc[s].gnt), + .rvalid_i(obi_reorg_rsp_to_rrc[s].rvalid) + ); + end // else: !if(MUX_READ) + + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .DEPTH(1) + ) + obi_rready_converter_read_i ( + .clk_i, + .rst_ni, + .test_mode_i, + .sbr_a_chan_i(obi_read_req_muxed[s].a), + .req_i(obi_read_req_muxed[s].req), + .gnt_o(obi_read_rsp_to_mux[s].gnt), + .rready_i(obi_read_req_muxed[s].rready), + .sbr_r_chan_o(obi_read_rsp_to_mux[s].r), + .rvalid_o(obi_read_rsp_to_mux[s].rvalid), + .mgr_a_chan_o(obi_read_req_from_rrc[s].a), + .req_o(obi_read_req_from_rrc[s].req), + .rready_o(obi_read_req_from_rrc[s].rready), + .mgr_r_chan_i(obi_read_rsp_to_rrc[s].r), + .gnt_i(obi_read_rsp_to_rrc[s].gnt), + .rvalid_i(obi_read_rsp_to_rrc[s].rvalid) + ); + + + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .DEPTH(1) + ) + obi_rready_converter_wr_i ( + .clk_i, + .rst_ni, + .test_mode_i, + .sbr_a_chan_i(obi_write_req_from_dma[s].a), + .req_i(obi_write_req_from_dma[s].req), + .gnt_o(obi_write_rsp_to_dma[s].gnt), + .rready_i(obi_write_req_from_dma[s].rready), + .sbr_r_chan_o(obi_write_rsp_to_dma[s].r), + .rvalid_o(obi_write_rsp_to_dma[s].rvalid), + .mgr_a_chan_o(obi_write_req_from_rrc[s].a), + .req_o(obi_write_req_from_rrc[s].req), + .rready_o(obi_write_req_from_rrc[s].rready), + .mgr_r_chan_i(obi_write_rsp_to_rrc[s].r), + .gnt_i(obi_write_rsp_to_rrc[s].gnt), + .rvalid_i(obi_write_rsp_to_rrc[s].rvalid) + ); + end - idma_backend #( - .DataWidth ( AXI_DATA_WIDTH ), - .AddrWidth ( AXI_ADDR_WIDTH ), - .UserWidth ( AXI_USER_WIDTH ), - .AxiIdWidth ( AXI_ID_WIDTH ), - .NumAxInFlight ( NB_OUTSND_BURSTS ), - .BufferDepth ( 3 ), - .TFLenWidth ( TFLenWidth ), - .RAWCouplingAvail ( 1'b1 ), - .MemSysDepth ( 32'd0 ), - .MaskInvalidData ( 1'b1 ), - .HardwareLegalizer ( 1'b1 ), - .RejectZeroTransfers ( 1'b1 ), - .ErrorCap ( idma_pkg::NO_ERROR_HANDLING ), - .idma_req_t ( idma_req_t ), - .idma_rsp_t ( idma_rsp_t ), - .idma_eh_req_t ( idma_pkg::idma_eh_req_t ), - .idma_busy_t ( idma_pkg::idma_busy_t ), - .protocol_req_t ( slv_req_t ), - .protocol_rsp_t ( slv_resp_t ), - .aw_chan_t ( slv_aw_chan_t ), - .ar_chan_t ( slv_ar_chan_t ) - ) i_idma_backend ( - .clk_i, - .rst_ni, - .testmode_i ( test_mode_i ), - - .idma_req_i ( burst_req ), - .req_valid_i ( be_valid ), - .req_ready_o ( be_ready ), - - .idma_rsp_o ( idma_rsp ), - .rsp_valid_o ( be_rsp_valid ), - .rsp_ready_i ( be_rsp_ready ), - - .idma_eh_req_i ( '0 ), // No error handling - .eh_req_valid_i ( 1'b1 ), - .eh_req_ready_o (/*NOT CONNECTED*/), - - .protocol_req_o ( dma_req ), - .protocol_rsp_i ( dma_rsp ), - .busy_o ( idma_busy ) - ); // ------------------------------------------------------ - // AXI connection to EXT/TCDM + // TCDM connections // ------------------------------------------------------ + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin + + logic tcdm_master_we_0; + logic tcdm_master_we_1; + logic tcdm_master_we_2; + logic tcdm_master_we_3; + logic tcdm_master_we_4; + logic tcdm_master_we_5; + + mem_to_banks #( + .AddrWidth(AXI_ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_read ( + .clk_i, + .rst_ni, + .req_i(obi_read_req_from_rrc[s].req), + .gnt_o(obi_read_rsp_to_rrc[s].gnt), + .addr_i(obi_read_req_from_rrc[s].a.addr), + .wdata_i(obi_read_req_from_rrc[s].a.wdata), + .strb_i(obi_read_req_from_rrc[s].a.be), + .atop_i('0), + .we_i(obi_read_req_from_rrc[s].a.we), + .rvalid_o(obi_read_rsp_to_rrc[s].rvalid), + .rdata_o(obi_read_rsp_to_rrc[s].r.rdata), + .bank_req_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req + }), + .bank_gnt_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt + }), + .bank_addr_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add + }), + .bank_wdata_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data + }), + .bank_strb_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be + }), + .bank_atop_o( /* NOT CONNECTED */), + .bank_we_o({tcdm_master_we_3, tcdm_master_we_2}), + .bank_rvalid_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_valid, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid + }), + .bank_rdata_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_data, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data + }) + ); - // xbar - localparam int unsigned NumRules = 3; - typedef struct packed { - int unsigned idx; - logic [AXI_ADDR_WIDTH-1:0] start_addr; - logic [AXI_ADDR_WIDTH-1:0] end_addr; - } xbar_rule_t; - xbar_rule_t [NumRules-1:0] addr_map; - logic [AXI_ADDR_WIDTH-1:0] cluster_base_addr; - assign cluster_base_addr = ClusterBaseAddr; /* + (cluster_id_i << 22);*/ - assign addr_map = '{ - '{ // SoC low - start_addr: '0, - end_addr: cluster_base_addr, - idx: 0 - }, - '{ // TCDM - start_addr: cluster_base_addr, - end_addr: cluster_base_addr + TCDM_SIZE, - idx: 1 - }, - '{ // SoC high - start_addr: cluster_base_addr + TCDM_SIZE, - end_addr: '1, - idx: 0 - } - }; - localparam int unsigned NumMstPorts = 2; - localparam int unsigned NumSlvPorts = NUM_STREAMS; - - /* verilator lint_off WIDTHCONCAT */ - localparam axi_pkg::xbar_cfg_t XbarCfg = '{ - NoSlvPorts: NumSlvPorts, - NoMstPorts: NumMstPorts, - MaxMstTrans: NB_OUTSND_BURSTS, - MaxSlvTrans: NB_OUTSND_BURSTS, - FallThrough: 1'b0, - LatencyMode: axi_pkg::CUT_ALL_PORTS, - PipelineStages: 0, - AxiIdWidthSlvPorts: SlvIdxWidth, - AxiIdUsedSlvPorts: SlvIdxWidth, - UniqueIds: 1'b0, - AxiAddrWidth: AXI_ADDR_WIDTH, - AxiDataWidth: AXI_DATA_WIDTH, - NoAddrRules: NumRules - }; - /* verilator lint_on WIDTHCONCAT */ - - axi_xbar #( - .Cfg ( XbarCfg ), - .slv_aw_chan_t( slv_aw_chan_t ), - .mst_aw_chan_t( mst_aw_chan_t ), - .w_chan_t ( w_chan_t ), - .slv_b_chan_t ( slv_b_chan_t ), - .mst_b_chan_t ( mst_b_chan_t ), - .slv_ar_chan_t( slv_ar_chan_t ), - .mst_ar_chan_t( mst_ar_chan_t ), - .slv_r_chan_t ( slv_r_chan_t ), - .mst_r_chan_t ( mst_r_chan_t ), - .slv_req_t ( slv_req_t ), - .slv_resp_t ( slv_resp_t ), - .mst_req_t ( mst_req_t ), - .mst_resp_t ( mst_resp_t ), - .rule_t ( xbar_rule_t ) - ) i_dma_axi_xbar ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .test_i ( test_mode_i ), - .slv_ports_req_i ( dma_req ), - .slv_ports_resp_o ( dma_rsp ), - .mst_ports_req_o ( { tcdm_req, soc_req } ), - .mst_ports_resp_i ( { tcdm_rsp, soc_rsp } ), - .addr_map_i ( addr_map ), - .en_default_mst_port_i ( '0 ), - .default_mst_port_i ( '0 ) - ); + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !tcdm_master_we_2; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].wen = !tcdm_master_we_3; + + if (!MUX_READ) begin // if we don't mux the read, we have 6*NUM_BIDIR_STREAMS interfaces and the reorg + // interface goes straight to TCDM masters 5 and 4. + mem_to_banks #( + .AddrWidth(AXI_ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_reorg ( + .clk_i, + .rst_ni, + .req_i(obi_reorg_req_from_rrc[s].req), + .gnt_o(obi_reorg_rsp_to_rrc[s].gnt), + .addr_i(obi_reorg_req_from_rrc[s].a.addr), + .wdata_i(obi_reorg_req_from_rrc[s].a.wdata), + .strb_i(obi_reorg_req_from_rrc[s].a.be), + .atop_i('0), + .we_i(obi_reorg_req_from_rrc[s].a.we), + .rvalid_o(obi_reorg_rsp_to_rrc[s].rvalid), + .rdata_o(obi_reorg_rsp_to_rrc[s].r.rdata), + .bank_req_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].req + }), + .bank_gnt_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].gnt + }), + .bank_addr_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].add + }), + .bank_wdata_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].data + }), + .bank_strb_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].be + }), + .bank_atop_o( /* NOT CONNECTED */), + .bank_we_o({tcdm_master_we_5, tcdm_master_we_4}), + .bank_rvalid_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_valid, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_valid + }), + .bank_rdata_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_data, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_data + }) + ); + + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].boffs = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].boffs = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].wen = !tcdm_master_we_4; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].wen = !tcdm_master_we_5; + + end + mem_to_banks #( + .AddrWidth(AXI_ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_write ( + .clk_i, + .rst_ni, + .req_i(obi_write_req_from_rrc[s].req), + .gnt_o(obi_write_rsp_to_rrc[s].gnt), + .addr_i(obi_write_req_from_rrc[s].a.addr), + .wdata_i(obi_write_req_from_rrc[s].a.wdata), + .strb_i(obi_write_req_from_rrc[s].a.be), + .atop_i('0), + .we_i(obi_write_req_from_rrc[s].a.we), + .rvalid_o(obi_write_rsp_to_rrc[s].rvalid), + .rdata_o(obi_write_rsp_to_rrc[s].r.rdata), + .bank_req_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req + }), + .bank_gnt_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt + }), + .bank_addr_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add + }), + .bank_wdata_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data + }), + .bank_strb_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be + }), + .bank_atop_o( /* NOT CONNECTED */), + .bank_we_o({tcdm_master_we_1, tcdm_master_we_0}), + .bank_rvalid_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid + }), + .bank_rdata_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data + }) + ); - localparam int unsigned TcdmFifoDepth = 1; - `AXI_ASSIGN_REQ_STRUCT(tcdm_mem_req, tcdm_req) - - axi_to_mem_split #( - .axi_req_t ( mem_req_t ), - .axi_resp_t ( mst_resp_t ), - .AddrWidth ( ADDR_WIDTH ), - .AxiDataWidth ( AXI_DATA_WIDTH ), - .IdWidth ( MstIdxWidth ), - .MemDataWidth ( DATA_WIDTH ), - .BufDepth ( TcdmFifoDepth ), - .HideStrb ( 1'b1 ) - ) i_axi_to_mem ( - .clk_i, - .rst_ni, - .busy_o (), - .axi_req_i ( tcdm_mem_req ), - .axi_resp_o ( tcdm_rsp ), - .mem_req_o ( { tcdm_master[0].req, tcdm_master[1].req, - tcdm_master[2].req, tcdm_master[3].req } ), - .mem_gnt_i ( { tcdm_master[0].gnt, tcdm_master[1].gnt, - tcdm_master[2].gnt, tcdm_master[3].gnt } ), - .mem_addr_o ( { tcdm_master[0].add, tcdm_master[1].add, - tcdm_master[2].add, tcdm_master[3].add } ), - .mem_wdata_o ( { tcdm_master[0].data, tcdm_master[1].data, - tcdm_master[2].data, tcdm_master[3].data } ), - .mem_strb_o ( { tcdm_master[0].be, tcdm_master[1].be, - tcdm_master[2].be, tcdm_master[3].be } ), - .mem_atop_o ( ), - .mem_we_o ( { tcdm_master_we_0, tcdm_master_we_1, - tcdm_master_we_2, tcdm_master_we_3 } ), - .mem_rvalid_i ( { tcdm_master[0].r_valid, tcdm_master[1].r_valid, - tcdm_master[2].r_valid, tcdm_master[3].r_valid } ), - .mem_rdata_i ( { tcdm_master[0].r_data, tcdm_master[1].r_data, - tcdm_master[2].r_data, tcdm_master[3].r_data } ) - ); + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].wen = !tcdm_master_we_0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !tcdm_master_we_1; - // flip we polarity - assign tcdm_master[0].wen = !tcdm_master_we_0; - assign tcdm_master[1].wen = !tcdm_master_we_1; - assign tcdm_master[2].wen = !tcdm_master_we_2; - assign tcdm_master[3].wen = !tcdm_master_we_3; - - for (genvar ii=0; ii<4; ii++) begin : gen_tie_unused_tcdm_master - assign tcdm_master[ii].user = '0; - assign tcdm_master[ii].ecc = '0; - assign tcdm_master[ii].id = '0; - assign tcdm_master[ii].ereq = '0; - assign tcdm_master[ii].r_eready = '1; end - -endmodule : dmac_wrap +endmodule +`undef MY_MAX diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 24403eb4..15270e58 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -726,9 +726,9 @@ dmac_wrap #( .TCDM_ADD_WIDTH ( TcdmAddrWidth ) `else .NB_PE_PORTS ( 2 ), - .NUM_STREAMS ( 4 ), - .TCDM_SIZE ( Cfg.TcdmSize ), - .ClusterBaseAddr ( Cfg.ClusterBaseAddr ) + .NUM_BIDIR_STREAMS ( 1 ), + .GLOBAL_QUEUE_DEPTH ( 2 ), + .MUX_READ ( 1'b1 ), `endif ) dmac_wrap_i ( .clk_i ( clk_i ), @@ -737,10 +737,13 @@ dmac_wrap #( .pe_ctrl_slave ( s_periph_dma_bus[1:0] ), .ctrl_slave ( s_core_dmactrl_bus ), .tcdm_master ( s_hci_dma ), - +`ifdef TARGET_MCHAN .ext_master_req_o ( s_dma_ext_bus_req ), .ext_master_resp_i ( s_dma_ext_bus_resp ), - +`else + .ext_master_req_o ( {s_dma_ext_bus_req} ), + .ext_master_resp_i ( {s_dma_ext_bus_resp} ), +`endif .term_event_o ( s_dma_event ), .term_irq_o ( s_dma_irq ), .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ), @@ -748,6 +751,7 @@ dmac_wrap #( .busy_o ( s_dmac_busy ) ); + //*************************************************** //**************CLUSTER PERIPHERALS****************** //*************************************************** From 4cfd84952befddb35bd676a1e1e037950640da52 Mon Sep 17 00:00:00 2001 From: Georg Rutishauser Date: Tue, 2 Jul 2024 16:50:26 +0200 Subject: [PATCH 05/67] hw,tb: Detach DMA from cluster bus, add wide port, test --- include/cluster_bus_defines.sv | 2 +- packages/pulp_cluster_package.sv | 14 ++- rtl/cluster_bus_wrap.sv | 14 +-- rtl/pulp_cluster.sv | 162 ++++++++++++++++++++++++++----- rtl/pulp_cluster_wrap.sv | 74 ++++++++++++-- tb/pulp_cluster_tb.sv | 134 ++++++++++++++++++++++--- 6 files changed, 343 insertions(+), 57 deletions(-) diff --git a/include/cluster_bus_defines.sv b/include/cluster_bus_defines.sv index 487e2629..928ae829 100644 --- a/include/cluster_bus_defines.sv +++ b/include/cluster_bus_defines.sv @@ -13,7 +13,7 @@ * */ -`define NB_SLAVE 4 +`define NB_SLAVE 3 `define NB_MASTER 3 `define NB_REGION 1 diff --git a/packages/pulp_cluster_package.sv b/packages/pulp_cluster_package.sv index 2f8dae70..b47e68c2 100644 --- a/packages/pulp_cluster_package.sv +++ b/packages/pulp_cluster_package.sv @@ -144,12 +144,16 @@ package pulp_cluster_package; byte_t AxiIdInWidth; // AXI ID width of crossbar manager ports byte_t AxiIdOutWidth; + // AXI ID width of wide external to cluster port + byte_t AxiIdOutWideWidth; // AXI address width byte_t AxiAddrWidth; - // AXI data width from external to cluster + // AXI data width from external to cluster (narrow) byte_t AxiDataInWidth; - // AXI data width from cluster to external + // AXI data width from cluster to external (narrow) byte_t AxiDataOutWidth; + // AXI data width from cluster to external (wide) + word_t AxiDataOutWideWidth; // AXI user width byte_t AxiUserWidth; // AXI maximum subordinate transaction per ID @@ -196,7 +200,7 @@ package pulp_cluster_package; localparam int unsigned SPER_ERROR_ID = 12; // -> unmapped, directed to error // The following parameters refer to the cluster AXI crossbar - localparam byte_t NumAxiSubordinatePorts = 4; + localparam byte_t NumAxiSubordinatePorts = 3; localparam byte_t NumAxiManagerPorts = 3; localparam byte_t AxiSubordinateIdwidth = 4; localparam byte_t AxiManagerIdwidth = AxiSubordinateIdwidth + $clog2(NumAxiSubordinatePorts); @@ -251,10 +255,12 @@ package pulp_cluster_package; NumAxiIn: NumAxiSubordinatePorts, NumAxiOut: NumAxiManagerPorts, AxiIdInWidth: AxiSubordinateIdwidth, - AxiIdOutWidth:AxiManagerIdwidth, + AxiIdOutWidth: AxiManagerIdwidth, + AxiIdOutWideWidth: 1, AxiAddrWidth: 48, AxiDataInWidth: 64, AxiDataOutWidth: 64, + AxiDataOutWideWidth: 128, AxiUserWidth: 10, AxiMaxInTrans: 64, AxiMaxOutTrans: 64, diff --git a/rtl/cluster_bus_wrap.sv b/rtl/cluster_bus_wrap.sv index 775e5622..dbf867d3 100644 --- a/rtl/cluster_bus_wrap.sv +++ b/rtl/cluster_bus_wrap.sv @@ -31,7 +31,6 @@ module cluster_bus_wrap parameter int unsigned AXI_ID_IN_WIDTH = 4 , parameter int unsigned AXI_ID_OUT_WIDTH = 6 , parameter int unsigned AXI_USER_WIDTH = 6 , - parameter int unsigned DMA_NB_OUTSND_BURSTS = 8 , parameter int unsigned TCDM_SIZE = 0, parameter logic [AXI_ADDR_WIDTH-1:0] BaseAddr = 'h10000000, parameter logic [AXI_ADDR_WIDTH-1:0] ClusterPeripheralsOffs = 'h00200000, @@ -59,8 +58,6 @@ module cluster_bus_wrap output slave_resp_t data_slave_resp_o, input slave_req_t instr_slave_req_i, output slave_resp_t instr_slave_resp_o, - input slave_req_t dma_slave_req_i, - output slave_resp_t dma_slave_resp_o, input slave_req_t ext_slave_req_i, output slave_resp_t ext_slave_resp_o, //INITIATOR @@ -92,10 +89,8 @@ module cluster_bus_wrap `AXI_ASSIGN_RESP_STRUCT(data_slave_resp_o, axi_slave_resps[0]) `AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[1], instr_slave_req_i) `AXI_ASSIGN_RESP_STRUCT(instr_slave_resp_o, axi_slave_resps[1]) - `AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[2], dma_slave_req_i) - `AXI_ASSIGN_RESP_STRUCT(dma_slave_resp_o, axi_slave_resps[2]) - `AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[3], ext_slave_req_i) - `AXI_ASSIGN_RESP_STRUCT(ext_slave_resp_o, axi_slave_resps[3]) + `AXI_ASSIGN_REQ_STRUCT(axi_slave_reqs[2], ext_slave_req_i) + `AXI_ASSIGN_RESP_STRUCT(ext_slave_resp_o, axi_slave_resps[2]) master_req_t [NB_MASTER-1:0] axi_master_reqs; master_resp_t [NB_MASTER-1:0] axi_master_resps; @@ -142,15 +137,14 @@ module cluster_bus_wrap end_addr: cluster_base_addr }; - localparam int unsigned MAX_TXNS_PER_SLV_PORT = (DMA_NB_OUTSND_BURSTS > NB_CORES) ? - DMA_NB_OUTSND_BURSTS : NB_CORES; + localparam int unsigned MAX_TXNS_PER_SLV_PORT = NB_CORES; localparam xbar_cfg_t AXI_XBAR_CFG = '{ NoSlvPorts: NB_SLAVE, NoMstPorts: NB_MASTER, MaxMstTrans: MAX_TXNS_PER_SLV_PORT, //The TCDM ports do not support //outstanding transactiions anyways - MaxSlvTrans: DMA_NB_OUTSND_BURSTS + NB_CORES, //Allow up to 4 in-flight transactions + MaxSlvTrans: NB_CORES, //Allow up to 4 in-flight transactions //per slave port FallThrough: 1'b0, //Use the reccomended default config LatencyMode: axi_pkg::NO_LATENCY, // CUT_ALL_AX | axi_pkg::DemuxW, diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 15270e58..42894248 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -35,7 +35,7 @@ module pulp_cluster localparam int unsigned MaxUniqId = 1, localparam int unsigned AxiIdInWidth = pulp_cluster_package::AxiSubordinateIdwidth, localparam int unsigned AxiIdOutWidth = pulp_cluster_package::AxiManagerIdwidth, - // CDC AXI parameters (external to cluster) + // CDC AXI parameters (external to cluster, narrow) localparam int unsigned AwInWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth, Cfg.AxiIdInWidth, Cfg.AxiUserWidth), @@ -54,7 +54,7 @@ module pulp_cluster localparam int unsigned AsyncInBDataWidth = (2**Cfg.AxiCdcLogDepth)*BInWidth, localparam int unsigned AsyncInArDatawidth = (2**Cfg.AxiCdcLogDepth)*ArInWidth, localparam int unsigned AsyncInRDataWidth = (2**Cfg.AxiCdcLogDepth)*RInWidth, - // CDC AXI parameters (cluster to external) + // CDC AXI parameters (cluster to external, narrow) localparam int unsigned AwOutWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth, Cfg.AxiIdOutWidth, Cfg.AxiUserWidth), @@ -73,6 +73,25 @@ module pulp_cluster localparam int unsigned AsyncOutBDataWidth = (2**Cfg.AxiCdcLogDepth)*BOutWidth, localparam int unsigned AsyncOutArDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWidth, localparam int unsigned AsyncOutRDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWidth, + // CDC AXI parameters (cluster to external, wide) + localparam int unsigned AwOutWideWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth, + Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth), + localparam int unsigned WOutWideWidth = axi_pkg::w_width(Cfg.AxiDataOutWideWidth, + Cfg.AxiUserWidth), + localparam int unsigned BOutWideWidth = axi_pkg::b_width(Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth), + localparam int unsigned ArOutWideWidth = axi_pkg::ar_width(Cfg.AxiAddrWidth, + Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth), + localparam int unsigned ROutWideWidth = axi_pkg::r_width(Cfg.AxiDataOutWideWidth, + Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth), + localparam int unsigned AsyncOutAwWideDataWidth = (2**Cfg.AxiCdcLogDepth)*AwOutWideWidth, + localparam int unsigned AsyncOutWWideDataWidth = (2**Cfg.AxiCdcLogDepth)*WOutWideWidth, + localparam int unsigned AsyncOutBWideDataWidth = (2**Cfg.AxiCdcLogDepth)*BOutWideWidth, + localparam int unsigned AsyncOutArWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWideWidth, + localparam int unsigned AsyncOutRWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWideWidth, // Internal bus parameters // TCDM data bus width (never changes) localparam int unsigned DataWidth = 32, @@ -130,6 +149,7 @@ module pulp_cluster input logic axi_isolate_i, output logic axi_isolated_o, + output logic axi_isolated_wide_o, input logic dma_pe_evt_ack_i, output logic dma_pe_evt_valid_o, @@ -149,7 +169,7 @@ module pulp_cluster input logic [AsyncEventDataWidth-1:0] async_cluster_events_data_i, - // AXI4 SLAVE + // AXI4 SLAVE Narrow //*************************************** // WRITE ADDRESS CHANNEL input logic [Cfg.AxiCdcLogDepth:0] async_data_slave_aw_wptr_i, @@ -175,7 +195,7 @@ module pulp_cluster output logic [Cfg.AxiCdcLogDepth:0] async_data_slave_b_wptr_o, output logic [AsyncInBDataWidth-1:0] async_data_slave_b_data_o, input logic [Cfg.AxiCdcLogDepth:0] async_data_slave_b_rptr_i, - // AXI4 MASTER + // AXI4 MASTER Narrow //*************************************** // WRITE ADDRESS CHANNEL output logic [Cfg.AxiCdcLogDepth:0] async_data_master_aw_wptr_o, @@ -200,7 +220,33 @@ module pulp_cluster // WRITE RESPONSE CHANNEL input logic [Cfg.AxiCdcLogDepth:0] async_data_master_b_wptr_i, input logic [AsyncOutBDataWidth-1:0] async_data_master_b_data_i, - output logic [Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o + output logic [Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o, + // AXI4 MASTER Wide + //************************************** + // WRITE ADDRESS CHANNEL + output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_aw_wptr_o, + output logic [AsyncOutAwWideDataWidth-1:0] async_wide_master_aw_data_o, + input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_aw_rptr_i, + + // READ ADDRESS CHANNEL + output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_ar_wptr_o, + output logic [AsyncOutArWideDataWidth-1:0] async_wide_master_ar_data_o, + input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_ar_rptr_i, + + // WRITE DATA CHANNEL + output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_w_wptr_o, + output logic [AsyncOutWWideDataWidth-1:0] async_wide_master_w_data_o, + input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_w_rptr_i, + + // READ DATA CHANNEL + input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_r_wptr_i, + input logic [AsyncOutRWideDataWidth-1:0] async_wide_master_r_data_i, + output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_r_rptr_o, + + // WRITE RESPONSE CHANNEL + input logic [Cfg.AxiCdcLogDepth:0] async_wide_master_b_wptr_i, + input logic [AsyncOutBWideDataWidth-1:0] async_wide_master_b_data_i, + output logic [Cfg.AxiCdcLogDepth:0] async_wide_master_b_rptr_o ); //Ensure that the input AXI ID width is big enough to accomodate the accomodate the IDs of internal wiring @@ -473,6 +519,16 @@ hci_core_intf #( `AXI_TYPEDEF_REQ_T(c2s_out_int_req_t,c2s_out_int_aw_chan_t,c2s_out_int_w_chan_t,c2s_out_int_ar_chan_t) `AXI_TYPEDEF_RESP_T(c2s_out_int_resp_t,c2s_out_int_b_chan_t,c2s_out_int_r_chan_t) + // CLUSTER TO SOC Wide + `AXI_TYPEDEF_AW_CHAN_T(c2s_wide_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_W_CHAN_T(c2s_wide_w_chan_t,logic[Cfg.AxiDataOutWideWidth-1:0],logic[Cfg.AxiDataOutWideWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_B_CHAN_T(c2s_wide_b_chan_t,logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_AR_CHAN_T(c2s_wide_ar_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_R_CHAN_T(c2s_wide_r_chan_t,logic[Cfg.AxiDataOutWideWidth-1:0],logic[Cfg.AxiIdOutWideWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) + + `AXI_TYPEDEF_REQ_T(c2s_wide_req_t, c2s_wide_aw_chan_t, c2s_wide_w_chan_t, c2s_wide_ar_chan_t) + `AXI_TYPEDEF_RESP_T(c2s_wide_resp_t, c2s_wide_b_chan_t, c2s_wide_r_chan_t) + typedef s2c_in_int_aw_chan_t c2s_in_int_aw_chan_t; typedef c2s_out_int_w_chan_t c2s_in_int_w_chan_t; typedef s2c_in_int_b_chan_t c2s_in_int_b_chan_t; @@ -497,15 +553,14 @@ hci_core_intf #( c2s_in_int_req_t s_core_instr_bus_req; c2s_in_int_resp_t s_core_instr_bus_resp; + c2s_wide_req_t s_dma_master_req; + c2s_wide_resp_t s_dma_master_resp; + // core per2axi -> ext c2s_in_int_req_t s_core_ext_bus_req; c2s_in_int_resp_t s_core_ext_bus_resp; - // DMA -> ext - c2s_in_int_req_t s_dma_ext_bus_req; - c2s_in_int_resp_t s_dma_ext_bus_resp; - // ext -> axi2mem c2s_out_int_req_t s_ext_tcdm_bus_req; c2s_out_int_resp_t s_ext_tcdm_bus_resp; @@ -533,7 +588,6 @@ cluster_bus_wrap #( .NB_MASTER ( Cfg.NumAxiOut ), .NB_SLAVE ( Cfg.NumAxiIn ), .NB_CORES ( Cfg.NumCores ), - .DMA_NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), .TCDM_SIZE ( Cfg.TcdmSize ), .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), .AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ), @@ -565,8 +619,6 @@ cluster_bus_wrap #( .data_slave_resp_o ( s_core_ext_bus_resp ), .instr_slave_req_i ( s_core_instr_bus_req ), .instr_slave_resp_o ( s_core_instr_bus_resp ), - .dma_slave_req_i ( s_dma_ext_bus_req ), - .dma_slave_resp_o ( s_dma_ext_bus_resp ), .ext_slave_req_i ( s_data_slave_64_req ), .ext_slave_resp_o ( s_data_slave_64_resp ), .tcdm_master_req_o ( s_ext_tcdm_bus_req ), @@ -711,15 +763,15 @@ dmac_wrap #( .NB_CORES ( Cfg.NumCores ), .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), - .AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ), - .AXI_ID_WIDTH ( AxiIdInWidth ), + .AXI_DATA_WIDTH ( Cfg.AxiDataOutWideWidth ), + .AXI_ID_WIDTH ( Cfg.AxiIdOutWideWidth ), .AXI_USER_WIDTH ( Cfg.AxiUserWidth ), .PE_ID_WIDTH ( Cfg.NumCores + 1 ), .DATA_WIDTH ( DataWidth ), .ADDR_WIDTH ( AddrWidth ), .BE_WIDTH ( BeWidth ), - .axi_req_t ( c2s_in_int_req_t ), - .axi_resp_t ( c2s_in_int_resp_t ), + .axi_req_t ( c2s_wide_req_t ), + .axi_resp_t ( c2s_wide_resp_t ), `ifdef TARGET_MCHAN .NB_CTRLS ( Cfg.NumCores + 2 ), .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ), @@ -738,11 +790,11 @@ dmac_wrap #( .ctrl_slave ( s_core_dmactrl_bus ), .tcdm_master ( s_hci_dma ), `ifdef TARGET_MCHAN - .ext_master_req_o ( s_dma_ext_bus_req ), - .ext_master_resp_i ( s_dma_ext_bus_resp ), + .ext_master_req_o ( s_dma_master_req ), + .ext_master_resp_i ( s_dma_master_resp ), `else - .ext_master_req_o ( {s_dma_ext_bus_req} ), - .ext_master_resp_i ( {s_dma_ext_bus_resp} ), + .ext_master_req_o ( {s_dma_master_req} ), + .ext_master_resp_i ( {s_dma_master_resp} ), `endif .term_event_o ( s_dma_event ), .term_irq_o ( s_dma_irq ), @@ -1507,6 +1559,7 @@ tcdm_banks_wrap #( //******************************************************** //**************** AXI REGISTER SLICES ******************* //******************************************************** + // CLUSTER TO SOC `AXI_TYPEDEF_AW_CHAN_T(c2s_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdOutWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) `AXI_TYPEDEF_W_CHAN_T(c2s_w_chan_t,logic[Cfg.AxiDataOutWidth-1:0],logic[Cfg.AxiDataOutWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0]) @@ -1517,9 +1570,6 @@ tcdm_banks_wrap #( `AXI_TYPEDEF_REQ_T(c2s_req_t,c2s_aw_chan_t,c2s_w_chan_t,c2s_ar_chan_t) `AXI_TYPEDEF_RESP_T(c2s_resp_t,c2s_b_chan_t,c2s_r_chan_t) -c2s_req_t src_req, isolate_src_req ; -c2s_resp_t src_resp, isolate_src_resp; - sync #( .STAGES ( Cfg.SyncStages ), .ResetValue ( 1'b1 ) @@ -1570,6 +1620,10 @@ sync #( .serial_o ( mbox_irq_synch ) ); +// Cluster to Soc (narrow) +c2s_req_t src_req, isolate_src_req; +c2s_resp_t src_resp, isolate_src_resp; + `AXI_TYPEDEF_AW_CHAN_T(c2s_remap_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[AxiIdOutWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) `AXI_TYPEDEF_W_CHAN_T(c2s_remap_w_chan_t,logic[Cfg.AxiDataOutWidth-1:0],logic[Cfg.AxiDataOutWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0]) `AXI_TYPEDEF_B_CHAN_T(c2s_remap_b_chan_t,logic[AxiIdOutWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) @@ -1661,6 +1715,67 @@ axi_cdc_src #( .async_data_master_r_data_i ( async_data_master_r_data_i ) ); +// Cluster to Soc (wide) +c2s_wide_req_t src_wide_req, isolate_src_wide_req; +c2s_wide_resp_t src_wide_resp, isolate_src_wide_resp; + +assign isolate_src_wide_req = s_dma_master_req; +assign s_dma_master_resp = isolate_src_wide_resp; + +axi_isolate #( + .NumPending ( 8 ), + .TerminateTransaction ( 1 ), + .AtopSupport ( 1 ), + .AxiAddrWidth ( Cfg.AxiAddrWidth ), + .AxiDataWidth ( Cfg.AxiDataOutWideWidth ), + .AxiIdWidth ( Cfg.AxiIdOutWideWidth ), + .AxiUserWidth ( Cfg.AxiUserWidth ), + .axi_req_t ( c2s_wide_req_t ), + .axi_resp_t ( c2s_wide_resp_t ) +) i_axi_wide_master_isolate ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( isolate_src_wide_req ), + .slv_resp_o ( isolate_src_wide_resp ), + .mst_req_o ( src_wide_req ), + .mst_resp_i ( src_wide_resp ), + .isolate_i ( axi_isolate_synch ), + .isolated_o ( axi_isolated_wide_o ) +); + +axi_cdc_src #( + .aw_chan_t ( c2s_wide_aw_chan_t ), + .w_chan_t ( c2s_wide_w_chan_t ), + .b_chan_t ( c2s_wide_b_chan_t ), + .r_chan_t ( c2s_wide_r_chan_t ), + .ar_chan_t ( c2s_wide_ar_chan_t ), + .axi_req_t ( c2s_wide_req_t ), + .axi_resp_t ( c2s_wide_resp_t ), + .LogDepth ( Cfg.AxiCdcLogDepth ), + .SyncStages ( Cfg.AxiCdcSyncStages ) +) axi_wide_master_cdc_i ( + .src_rst_ni ( pwr_on_rst_ni ), + .src_clk_i ( clk_i ), + .src_req_i ( src_wide_req ), + .src_resp_o ( src_wide_resp ), + .async_data_master_aw_wptr_o ( async_wide_master_aw_wptr_o ), + .async_data_master_aw_rptr_i ( async_wide_master_aw_rptr_i ), + .async_data_master_aw_data_o ( async_wide_master_aw_data_o ), + .async_data_master_w_wptr_o ( async_wide_master_w_wptr_o ), + .async_data_master_w_rptr_i ( async_wide_master_w_rptr_i ), + .async_data_master_w_data_o ( async_wide_master_w_data_o ), + .async_data_master_ar_wptr_o ( async_wide_master_ar_wptr_o ), + .async_data_master_ar_rptr_i ( async_wide_master_ar_rptr_i ), + .async_data_master_ar_data_o ( async_wide_master_ar_data_o ), + .async_data_master_b_wptr_i ( async_wide_master_b_wptr_i ), + .async_data_master_b_rptr_o ( async_wide_master_b_rptr_o ), + .async_data_master_b_data_i ( async_wide_master_b_data_i ), + .async_data_master_r_wptr_i ( async_wide_master_r_wptr_i ), + .async_data_master_r_rptr_o ( async_wide_master_r_rptr_o ), + .async_data_master_r_data_i ( async_wide_master_r_data_i ) +); + + // SOC TO CLUSTER `AXI_TYPEDEF_AW_CHAN_T(s2c_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdInWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) `AXI_TYPEDEF_W_CHAN_T(s2c_w_chan_t,logic[Cfg.AxiDataInWidth-1:0],logic[Cfg.AxiDataInWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0]) @@ -1671,6 +1786,7 @@ axi_cdc_src #( `AXI_TYPEDEF_REQ_T(s2c_req_t,s2c_aw_chan_t,s2c_w_chan_t,s2c_ar_chan_t) `AXI_TYPEDEF_RESP_T(s2c_resp_t,s2c_b_chan_t,s2c_r_chan_t) +// Soc to Cluster (narrow) s2c_req_t dst_req; s2c_resp_t dst_resp; diff --git a/rtl/pulp_cluster_wrap.sv b/rtl/pulp_cluster_wrap.sv index 25d1e700..dffe70d1 100644 --- a/rtl/pulp_cluster_wrap.sv +++ b/rtl/pulp_cluster_wrap.sv @@ -19,7 +19,7 @@ package pulp_cluster_wrap_package; localparam int unsigned AxiIdInWidth = pulp_cluster_package::AxiSubordinateIdwidth; localparam int unsigned AxiIdOutWidth = pulp_cluster_package::AxiManagerIdwidth; - // CDC AXI parameters (external to cluster) + // CDC AXI parameters (external to cluster, narrow) localparam int unsigned AwInWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth, Cfg.AxiIdInWidth, Cfg.AxiUserWidth); @@ -38,7 +38,7 @@ package pulp_cluster_wrap_package; localparam int unsigned AsyncInBDataWidth = (2**Cfg.AxiCdcLogDepth)*BInWidth; localparam int unsigned AsyncInArDatawidth = (2**Cfg.AxiCdcLogDepth)*ArInWidth; localparam int unsigned AsyncInRDataWidth = (2**Cfg.AxiCdcLogDepth)*RInWidth; - // CDC AXI parameters (cluster to external) + // CDC AXI parameters (cluster to external, narrow) localparam int unsigned AwOutWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth, Cfg.AxiIdOutWidth, Cfg.AxiUserWidth); @@ -58,6 +58,25 @@ package pulp_cluster_wrap_package; localparam int unsigned AsyncOutArDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWidth; localparam int unsigned AsyncOutRDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWidth; localparam int unsigned AsyncEventDataWidth = (2**Cfg.AxiCdcLogDepth)*EventWidth; + // CDC AXI parameters (cluster to external, wide) + localparam int unsigned AwOutWideWidth = axi_pkg::aw_width(Cfg.AxiAddrWidth, + Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth); + localparam int unsigned WOutWideWidth = axi_pkg::w_width(Cfg.AxiDataOutWideWidth, + Cfg.AxiUserWidth); + localparam int unsigned BOutWideWidth = axi_pkg::b_width(Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth); + localparam int unsigned ArOutWideWidth = axi_pkg::ar_width(Cfg.AxiAddrWidth, + Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth); + localparam int unsigned ROutWideWidth = axi_pkg::r_width(Cfg.AxiDataOutWideWidth, + Cfg.AxiIdOutWideWidth, + Cfg.AxiUserWidth); + localparam int unsigned AsyncOutAwWideDataWidth = (2**Cfg.AxiCdcLogDepth)*AwOutWideWidth; + localparam int unsigned AsyncOutWWideDataWidth = (2**Cfg.AxiCdcLogDepth)*WOutWideWidth; + localparam int unsigned AsyncOutBWideDataWidth = (2**Cfg.AxiCdcLogDepth)*BOutWideWidth; + localparam int unsigned AsyncOutArWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ArOutWideWidth; + localparam int unsigned AsyncOutRWideDataWidth = (2**Cfg.AxiCdcLogDepth)*ROutWideWidth; endpackage module pulp_cluster_wrap ( @@ -75,6 +94,7 @@ module pulp_cluster_wrap ( output logic busy_o, input logic axi_isolate_i, output logic axi_isolated_o, + output logic axi_isolated_wide_o, input logic dma_pe_evt_ack_i, output logic dma_pe_evt_valid_o, input logic dma_pe_irq_ack_i, @@ -86,7 +106,7 @@ module pulp_cluster_wrap ( input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_cluster_events_wptr_i, output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_cluster_events_rptr_o, input logic [pulp_cluster_wrap_package::AsyncEventDataWidth-1:0] async_cluster_events_data_i, - // AXI4 SLAVE + // AXI4 SLAVE Narrow //*************************************** // WRITE ADDRESS CHANNEL input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_slave_aw_wptr_i, @@ -108,7 +128,7 @@ module pulp_cluster_wrap ( output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_slave_b_wptr_o, output logic [pulp_cluster_wrap_package::AsyncInBDataWidth-1:0] async_data_slave_b_data_o, input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_slave_b_rptr_i, - // AXI4 MASTER + // AXI4 MASTER Narrow //*************************************** // WRITE ADDRESS CHANNEL output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_aw_wptr_o, @@ -129,7 +149,33 @@ module pulp_cluster_wrap ( // WRITE RESPONSE CHANNEL input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_b_wptr_i, input logic [pulp_cluster_wrap_package::AsyncOutBDataWidth-1:0] async_data_master_b_data_i, - output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o + output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o, + // AXI4 MASTER Wide + //************************************** + // WRITE ADDRESS CHANNEL + output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_aw_wptr_o, + output logic [pulp_cluster_wrap_package::AsyncOutAwWideDataWidth-1:0] async_wide_master_aw_data_o, + input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_aw_rptr_i, + + // READ ADDRESS CHANNEL + output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_ar_wptr_o, + output logic [pulp_cluster_wrap_package::AsyncOutArWideDataWidth-1:0] async_wide_master_ar_data_o, + input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_ar_rptr_i, + + // WRITE DATA CHANNEL + output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_w_wptr_o, + output logic [pulp_cluster_wrap_package::AsyncOutWWideDataWidth-1:0] async_wide_master_w_data_o, + input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_w_rptr_i, + + // READ DATA CHANNEL + input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_r_wptr_i, + input logic [pulp_cluster_wrap_package::AsyncOutRWideDataWidth-1:0] async_wide_master_r_data_i, + output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_r_rptr_o, + + // WRITE RESPONSE CHANNEL + input logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_b_wptr_i, + input logic [pulp_cluster_wrap_package::AsyncOutBWideDataWidth-1:0] async_wide_master_b_data_i, + output logic [pulp_cluster_wrap_package::Cfg.AxiCdcLogDepth:0] async_wide_master_b_rptr_o ); pulp_cluster #( .Cfg(pulp_cluster_wrap_package::Cfg) ) pulp_cluster_i ( @@ -139,6 +185,7 @@ module pulp_cluster_wrap ( .ref_clk_i, .axi_isolate_i ( '0 ), .axi_isolated_o, + .axi_isolated_wide_o, .pmu_mem_pwdn_i ( 1'b0 ), .base_addr_i, .dma_pe_evt_ack_i ( '1 ), @@ -189,6 +236,21 @@ module pulp_cluster_wrap ( .async_data_slave_r_data_o, .async_data_slave_b_wptr_o, .async_data_slave_b_rptr_i, - .async_data_slave_b_data_o + .async_data_slave_b_data_o, + .async_wide_master_aw_wptr_o, + .async_wide_master_aw_data_o, + .async_wide_master_aw_rptr_i, + .async_wide_master_ar_wptr_o, + .async_wide_master_ar_data_o, + .async_wide_master_ar_rptr_i, + .async_wide_master_w_wptr_o, + .async_wide_master_w_data_o, + .async_wide_master_w_rptr_i, + .async_wide_master_r_wptr_i, + .async_wide_master_r_data_i, + .async_wide_master_r_rptr_o, + .async_wide_master_b_wptr_i, + .async_wide_master_b_data_i, + .async_wide_master_b_rptr_o ); endmodule diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 237535d7..3f9438f8 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -49,13 +49,16 @@ module pulp_cluster_tb; localparam AxiAw = 32; localparam AxiDw = 64; localparam AxiIw = 6; - localparam NMst = 2; + localparam NMst = 3; localparam NSlv = 3; localparam AxiIwMst = AxiIw + $clog2(NMst); localparam AxiWideBeWidth = AxiDw/8; localparam AxiWideByteOffset = $clog2(AxiWideBeWidth); localparam AxiUw = 10; + localparam DmaAxiDw = 64; + localparam DmaAxiIw = 1; + localparam bit[AxiAw-1:0] ClustBase = 'h10000000; localparam bit[AxiAw-1:0] ClustPeriphOffs = 'h00200000; localparam bit[AxiAw-1:0] ClustExtOffs = 'h00400000; @@ -73,23 +76,42 @@ module pulp_cluster_tb; typedef logic [AxiIw-1:0] axi_id_t; typedef logic [AxiIwMst-1:0] axi_m_id_t; + typedef logic [DmaAxiDw-1:0] dma_axi_data_t; + typedef logic [DmaAxiDw/8-1:0] dma_axi_strb_t; + typedef logic [DmaAxiIw-1:0] dma_axi_id_t; + + // Narrow AXI port type `AXI_TYPEDEF_W_CHAN_T(w_chan_t, axi_data_t, axi_strb_t, axi_user_t) `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, axi_addr_t, axi_id_t, axi_user_t) `AXI_TYPEDEF_B_CHAN_T(b_chan_t, axi_id_t, axi_user_t) `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, axi_addr_t, axi_id_t, axi_user_t) `AXI_TYPEDEF_R_CHAN_T(r_chan_t, axi_data_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_REQ_T(axi_req_t, aw_chan_t, w_chan_t, ar_chan_t) `AXI_TYPEDEF_RESP_T(axi_resp_t, b_chan_t, r_chan_t) + // Wide AXI port type + `AXI_TYPEDEF_W_CHAN_T(dma_w_chan_t, dma_axi_data_t, dma_axi_strb_t, axi_user_t) + `AXI_TYPEDEF_AW_CHAN_T(dma_aw_chan_t, axi_addr_t, dma_axi_id_t, axi_user_t) + `AXI_TYPEDEF_B_CHAN_T(dma_b_chan_t, dma_axi_id_t, axi_user_t) + `AXI_TYPEDEF_AR_CHAN_T(dma_ar_chan_t, axi_addr_t, dma_axi_id_t, axi_user_t) + `AXI_TYPEDEF_R_CHAN_T(dma_r_chan_t, dma_axi_data_t, dma_axi_id_t, axi_user_t) + + `AXI_TYPEDEF_REQ_T(dma_axi_req_t, aw_chan_t, dma_w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(dma_axi_resp_t, b_chan_t, dma_r_chan_t) + + // Memory-side AXI port type `AXI_TYPEDEF_AW_CHAN_T(aw_m_chan_t, axi_addr_t, axi_m_id_t, axi_user_t) `AXI_TYPEDEF_B_CHAN_T(b_m_chan_t, axi_m_id_t, axi_user_t) `AXI_TYPEDEF_AR_CHAN_T(ar_m_chan_t, axi_addr_t, axi_m_id_t, axi_user_t) `AXI_TYPEDEF_R_CHAN_T(r_m_chan_t, axi_data_t, axi_m_id_t, axi_user_t) + `AXI_TYPEDEF_REQ_T(axi_m_req_t, aw_m_chan_t, w_chan_t, ar_m_chan_t) `AXI_TYPEDEF_RESP_T(axi_m_resp_t, b_m_chan_t, r_m_chan_t) typedef logic [AxiAw-1:0] addr_t; typedef logic [AxiDw-1:0] data_t; + typedef logic [DmaAxiDw-1:0] dma_data_t; data_t memory [bit [31:0]]; int sections [bit [31:0]]; @@ -112,13 +134,27 @@ module pulp_cluster_tb; .AXI_DATA_WIDTH( AxiDw ), .AXI_ID_WIDTH ( AxiIw ), .AXI_USER_WIDTH( AxiUw ) - ) axi_slave[NMst-1:0](); + ) axi_slave[NMst-1:0](); + + AXI_BUS #( + .AXI_ADDR_WIDTH( AxiAw ), + .AXI_DATA_WIDTH( DmaAxiDw ), + .AXI_ID_WIDTH ( DmaAxiIw ), + .AXI_USER_WIDTH( AxiUw ) + ) dma_slave(); + + AXI_BUS #( + .AXI_ADDR_WIDTH( AxiAw ), + .AXI_DATA_WIDTH( DmaAxiDw ), + .AXI_ID_WIDTH ( AxiIw ), + .AXI_USER_WIDTH( AxiUw ) + ) dma_slave_iw(); AXI_BUS #( - .AXI_ADDR_WIDTH( AxiAw ), - .AXI_DATA_WIDTH( AxiDw ), - .AXI_ID_WIDTH ( AxiIw-2 ), - .AXI_USER_WIDTH( AxiUw ) + .AXI_ADDR_WIDTH( AxiAw ), + .AXI_DATA_WIDTH( AxiDw ), + .AXI_ID_WIDTH ( AxiIw-2 ), + .AXI_USER_WIDTH( AxiUw ) ) soc_to_cluster_axi_bus(); AXI_BUS_ASYNC_GRAY #( @@ -130,12 +166,20 @@ module pulp_cluster_tb; ) async_soc_to_cluster_axi_bus(); AXI_BUS_ASYNC_GRAY #( - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_DATA_WIDTH ( AxiDw ), - .AXI_ID_WIDTH ( AxiIw ), - .AXI_USER_WIDTH ( AxiUw ), - .LOG_DEPTH ( 3 ) + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_DATA_WIDTH ( AxiDw ), + .AXI_ID_WIDTH ( AxiIw ), + .AXI_USER_WIDTH ( AxiUw ), + .LOG_DEPTH ( 3 ) ) async_cluster_to_soc_axi_bus(); + + AXI_BUS_ASYNC_GRAY #( + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_DATA_WIDTH ( DmaAxiDw ), + .AXI_ID_WIDTH ( DmaAxiIw ), + .AXI_USER_WIDTH ( AxiUw ), + .LOG_DEPTH ( 3 ) + ) async_dma_axi_bus(); // Behavioural slaves axi_m_req_t axi_memreq; @@ -144,6 +188,38 @@ module pulp_cluster_tb; `AXI_ASSIGN_TO_REQ(axi_memreq, axi_master[1]) `AXI_ASSIGN_FROM_RESP(axi_master[1], axi_memrsp) + axi_dw_converter_intf #( + .AXI_ID_WIDTH ( AxiIw ), + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_SLV_PORT_DATA_WIDTH ( DmaAxiDw ), + .AXI_MST_PORT_DATA_WIDTH ( AxiDw ), + .AXI_USER_WIDTH ( AxiUw ), + .AXI_MAX_READS ( 3 ) + ) i_dma_dw_conv ( + .clk_i ( s_clk ), + .rst_ni ( s_rstn ), + .slv ( dma_slave_iw ), + .mst ( axi_slave[2] ) + ); + + axi_iw_converter_intf #( + .AXI_SLV_PORT_ID_WIDTH ( DmaAxiIw ), + .AXI_MST_PORT_ID_WIDTH ( AxiIw ), + .AXI_SLV_PORT_MAX_UNIQ_IDS ( 5 ), + .AXI_SLV_PORT_MAX_TXNS_PER_ID ( 5 ), + .AXI_SLV_PORT_MAX_TXNS ( 5 ), + .AXI_MST_PORT_MAX_UNIQ_IDS ( 5 ), + .AXI_MST_PORT_MAX_TXNS_PER_ID ( 5 ), + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_DATA_WIDTH ( DmaAxiDw ), + .AXI_USER_WIDTH ( AxiUw ) + ) i_dma_iw_conv ( + .clk_i ( s_clk ), + .rst_ni ( s_rstn ), + .slv ( dma_slave ), + .mst ( dma_slave_iw ) + ); + axi_sim_mem #( .AddrWidth ( AxiAw ), .DataWidth ( AxiDw ), @@ -279,13 +355,26 @@ module pulp_cluster_tb; .AXI_ID_WIDTH ( AxiIw ), .AXI_USER_WIDTH ( AxiUw ), .LOG_DEPTH ( 3 ) - ) cluster_to_soc_dst_cdc_fifo_i ( + ) cluster_to_soc_dst_cdc_fifo_i ( .dst_clk_i ( s_clk ), .dst_rst_ni ( s_rstn ), .src ( async_cluster_to_soc_axi_bus ), .dst ( axi_slave[1] ) ); + axi_cdc_dst_intf #( + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_DATA_WIDTH ( DmaAxiDw ), + .AXI_ID_WIDTH ( DmaAxiIw ), + .AXI_USER_WIDTH ( AxiUw ), + .LOG_DEPTH ( 3 ) + ) cluster_to_soc_dma_dst_cdc_fifo_i ( + .dst_clk_i ( s_clk ), + .dst_rst_ni ( s_rstn ), + .src ( async_dma_axi_bus ), + .dst ( dma_slave ) + ); + localparam pulp_cluster_cfg_t PulpClusterCfg = '{ CoreType: pulp_cluster_package::RI5CY, NumCores: `NB_CORES, @@ -331,10 +420,12 @@ module pulp_cluster_tb; NumAxiIn: NumAxiSubordinatePorts, NumAxiOut: NumAxiManagerPorts, AxiIdInWidth: AxiIw-2, - AxiIdOutWidth:AxiIw, + AxiIdOutWidth: AxiIw, + AxiIdOutWideWidth: 1, AxiAddrWidth: AxiAw, AxiDataInWidth: AxiDw, AxiDataOutWidth: AxiDw, + AxiDataOutWideWidth: DmaAxiDw, AxiUserWidth: AxiUw, AxiMaxInTrans: 64, AxiMaxOutTrans: 64, @@ -362,6 +453,7 @@ module pulp_cluster_tb; .ref_clk_i ( s_clk ), .axi_isolate_i ( '0 ), .axi_isolated_o ( ), + .axi_isolated_wide_o ( ), .pmu_mem_pwdn_i ( 1'b0 ), @@ -406,6 +498,22 @@ module pulp_cluster_tb; .async_data_master_b_rptr_o ( async_cluster_to_soc_axi_bus.b_rptr ), .async_data_master_b_data_i ( async_cluster_to_soc_axi_bus.b_data ), + .async_wide_master_aw_wptr_o ( async_dma_axi_bus.aw_wptr ), + .async_wide_master_aw_rptr_i ( async_dma_axi_bus.aw_rptr ), + .async_wide_master_aw_data_o ( async_dma_axi_bus.aw_data ), + .async_wide_master_ar_wptr_o ( async_dma_axi_bus.ar_wptr ), + .async_wide_master_ar_rptr_i ( async_dma_axi_bus.ar_rptr ), + .async_wide_master_ar_data_o ( async_dma_axi_bus.ar_data ), + .async_wide_master_w_data_o ( async_dma_axi_bus.w_data ), + .async_wide_master_w_wptr_o ( async_dma_axi_bus.w_wptr ), + .async_wide_master_w_rptr_i ( async_dma_axi_bus.w_rptr ), + .async_wide_master_r_wptr_i ( async_dma_axi_bus.r_wptr ), + .async_wide_master_r_rptr_o ( async_dma_axi_bus.r_rptr ), + .async_wide_master_r_data_i ( async_dma_axi_bus.r_data ), + .async_wide_master_b_wptr_i ( async_dma_axi_bus.b_wptr ), + .async_wide_master_b_rptr_o ( async_dma_axi_bus.b_rptr ), + .async_wide_master_b_data_i ( async_dma_axi_bus.b_data ), + .async_data_slave_aw_wptr_i ( async_soc_to_cluster_axi_bus.aw_wptr ), .async_data_slave_aw_rptr_o ( async_soc_to_cluster_axi_bus.aw_rptr ), .async_data_slave_aw_data_i ( async_soc_to_cluster_axi_bus.aw_data ), From a84e126492c23cb81000b7234dd4da2a33b7d0ea Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 25 Feb 2025 19:58:26 +0100 Subject: [PATCH 06/67] hw,tb: Share HCI shallow intc between (wide) DMA and HWPEs --- include/pulp_soc_defines.sv | 2 +- packages/pulp_cluster_package.sv | 5 + rtl/cluster_interconnect_wrap.sv | 107 ++++-- rtl/idma_wrap.sv | 539 ++++++++++++++++++------------- rtl/pulp_cluster.sv | 63 +++- tb/pulp_cluster_tb.sv | 5 +- 6 files changed, 465 insertions(+), 256 deletions(-) diff --git a/include/pulp_soc_defines.sv b/include/pulp_soc_defines.sv index 965fef4a..44eacd4f 100644 --- a/include/pulp_soc_defines.sv +++ b/include/pulp_soc_defines.sv @@ -48,6 +48,6 @@ //PARAMETRES `define NB_CLUSTERS 1 `define NB_CORES 8 -`define NB_DMAS 4 +`define NB_DMAS 2 `define NB_MPERIPHS 1 `define NB_SPERIPHS 12 diff --git a/packages/pulp_cluster_package.sv b/packages/pulp_cluster_package.sv index b47e68c2..05a6e60d 100644 --- a/packages/pulp_cluster_package.sv +++ b/packages/pulp_cluster_package.sv @@ -53,11 +53,15 @@ package pulp_cluster_package; // Number of cores in the cluster byte_t NumCores; // Number of DMA TCDM plugs + // If using MCHAN, must be 4. If using iDMA, can vary in multiples of 2 or 3 byte_t DmaNumPlugs; // Number of DMA outstanding transactions byte_t DmaNumOutstandingBursts; // DMA burst length in bits word_t DmaBurstLength; + // If the DMA should use a HWPE port, set to 1 + // This makes sense only when using a DMA_TCDM_DATA_WIDTH close to the HWP + bit DmaUseHwpePort; // Number of masters in crossbar peripherals byte_t NumMstPeriphs; // Number of slaves in crossbar peripherals @@ -213,6 +217,7 @@ package pulp_cluster_package; DmaNumPlugs: NumDmas, DmaNumOutstandingBursts: 8, DmaBurstLength: 256, + DmaUseHwpePort: 0, NumMstPeriphs: NB_MPERIPHS, NumSlvPeriphs: NB_SPERIPHS, ClusterAlias: 1, diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index 51b5dd15..d7332a06 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -21,8 +21,10 @@ import hci_package::*; module cluster_interconnect_wrap #( parameter int unsigned NB_CORES = 8, - parameter int unsigned HWPE_PRESENT = 1, - parameter int unsigned NB_HWPE_PORTS = 9, + parameter int unsigned NB_HWPE = 1, + // how many times wider a HWPE port is than DATA_WIDTH + parameter int unsigned HWPE_WIDTH_FAC = 4, + parameter bit DMA_USE_HWPE_PORT = 1'b0, parameter int unsigned NB_DMAS = 4, parameter int unsigned NB_MPERIPHS = 1, parameter int unsigned NB_TCDM_BANKS = 16, @@ -31,6 +33,7 @@ module cluster_interconnect_wrap parameter int unsigned DATA_WIDTH = 32, parameter int unsigned ADDR_WIDTH = 32, parameter int unsigned BE_WIDTH = DATA_WIDTH/8, + parameter int unsigned TCDM_ID_WIDTH = NB_DMAS + NB_CORES + 4 + HWPE_WIDTH_FAC, parameter logic [ADDR_WIDTH-1:0] ClusterBaseAddr = 'h10000000, parameter logic [ADDR_WIDTH-1:0] ClusterPeripheralsOffs = 'h00200000, parameter logic [ADDR_WIDTH-1:0] ClusterExternalOffs = 'h00400000, @@ -48,6 +51,7 @@ module cluster_interconnect_wrap parameter int unsigned USE_ECC_INTERCONNECT = 0, parameter hci_package::hci_size_parameter_t HCI_CORE_SIZE = '0, parameter hci_package::hci_size_parameter_t HCI_HWPE_SIZE = '0, + parameter hci_package::hci_size_parameter_t HCI_DMA_SIZE = '0, parameter hci_package::hci_size_parameter_t HCI_MEM_SIZE = '0 ) ( @@ -56,7 +60,7 @@ module cluster_interconnect_wrap input logic [5:0] cluster_id_i, XBAR_PERIPH_BUS.Slave hci_ecc_periph_slave, hci_core_intf.target core_tcdm_slave [0 : NB_CORES-1 ], - hci_core_intf.target hwpe_tcdm_slave [0 : 0 ], + hci_core_intf.target hwpe_tcdm_slave [0 : NB_HWPE-1 ], XBAR_PERIPH_BUS.Slave core_periph_slave [NB_CORES-1 : 0 ], hci_core_intf.target ext_slave [0 : 3 ], hci_core_intf.target dma_slave [0 : NB_DMAS-1 ], @@ -67,7 +71,9 @@ module cluster_interconnect_wrap input logic [1:0] TCDM_arb_policy_i ); - localparam TCDM_ID_WIDTH = NB_CORES+NB_DMAS+4+NB_HWPE_PORTS; + // if DMA uses HWPE ports, ID width must be increased correspondingly + localparam N_HCI_DMA_PORTS = DMA_USE_HWPE_PORT ? 0 : NB_DMAS; + localparam N_HCI_HWPE_PORTS = DMA_USE_HWPE_PORT ? NB_HWPE + NB_DMAS : NB_HWPE; //-******************************************************** @@ -75,12 +81,63 @@ module cluster_interconnect_wrap //-******************************************************** // Wraps the Logarithmic Interconnect + a HWPE Interconnect generate - if( USE_HETEROGENEOUS_INTERCONNECT || !HWPE_PRESENT ) begin : hci_gen + if( USE_HETEROGENEOUS_INTERCONNECT || (NB_HWPE == 0) ) begin : hci_gen + // HWPEs interface + hci_core_intf #( + .DW(HCI_HWPE_SIZE.DW), + .AW(HCI_HWPE_SIZE.AW), + .BW(HCI_HWPE_SIZE.BW), + .UW(HCI_HWPE_SIZE.UW), + .IW(HCI_HWPE_SIZE.IW), + .EW(HCI_HWPE_SIZE.EW), + .EHW(HCI_HWPE_SIZE.EHW) + ) + s_hwpe_intc [0:N_HCI_HWPE_PORTS-1] ( + .clk(clk_i) + ); + // DMA interface (only if !DMA_USE_HWPE_PORT) + hci_core_intf #( + .DW(HCI_CORE_SIZE.DW), + .AW(HCI_CORE_SIZE.AW), + .BW(HCI_HWPE_SIZE.BW), + .UW(HCI_HWPE_SIZE.UW), + .IW(HCI_HWPE_SIZE.IW), + .EW(HCI_HWPE_SIZE.EW), + .EHW(HCI_CORE_SIZE.EHW) + ) + s_dma_intc [0:N_HCI_DMA_PORTS-1] ( + .clk(clk_i) + ); + // assign HWPE interfaces to s_hwpe_intc[:] + for (genvar i=0; i 1) begin + assert(!USE_ECC_INTERCONNECT) + else $fatal(1, "If USE_ECC_INTERCONNECT is enabled, N_HCI_HWPE_PORTS cannot be more than 1."); + end + end + `endif + // pragma translate_on endmodule diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv index bcfc0268..fe5cc8c0 100644 --- a/rtl/idma_wrap.sv +++ b/rtl/idma_wrap.sv @@ -3,7 +3,7 @@ // SPDX-License-Identifier: SHL-0.51 /* - * idma_wrap.sv + * dmac_wrap.sv * Thomas Benz * Michael Rogenmoser * Georg Rutishauser @@ -17,7 +17,7 @@ `include "idma/typedef.svh" `include "register_interface/typedef.svh" -`define MY_MAX(a,b) (a > b ? a : b) +`define MY_MAX(a, b) (a > b ? a : b) module dmac_wrap #( parameter int unsigned NB_CORES = 4, @@ -39,8 +39,12 @@ module dmac_wrap #( parameter int unsigned GLOBAL_QUEUE_DEPTH = 2, // mux read ports between tcdm-tcdm and tcdm-axi? parameter bit MUX_READ = 1'b0, + parameter bit TCDM_MEM2BANKS = 1'b0, + // when using mem2banks (implies AXI_DATA_WIDTH==64): // 4 ports per stream if read ports muxed, otherwise 6 - localparam int unsigned NB_TCDM_PORTS_PER_STRM = 4 + (!MUX_READ) * 2 + // when not using mem2banks: + // 2 ports per stream if read ports muxed, otherwise 3 + localparam int unsigned NB_TCDM_PORTS_PER_STRM = (2 + (!MUX_READ)) * (1 + TCDM_MEM2BANKS) ) ( // verilog_format: off // verible does not manage to align this :( input logic clk_i, input logic rst_ni, @@ -55,7 +59,7 @@ module dmac_wrap #( output logic [NB_PE_PORTS-1:0] term_event_pe_o, output logic [NB_PE_PORTS-1:0] term_irq_pe_o, output logic busy_o -); // verilog_format: on +); // verilog_format: on localparam int unsigned NumRegs = NB_CORES + NB_PE_PORTS; localparam int unsigned NumStreams = 32'd2 * NUM_BIDIR_STREAMS; @@ -104,11 +108,11 @@ module dmac_wrap #( // Types types typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; - typedef logic [ADDR_WIDTH-1:0] mem_addr_t; + typedef logic [ADDR_WIDTH-1:0] mem_addr_t; typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ID_WIDTH-1:0] id_t; typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; - typedef logic [AXI_USER_WIDTH-1:0] user_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; // // AXI4+ATOP channels typedefs //`AXI_TYPEDEF_ALL(axi_int, addr_t, id_t, data_t, strb_t, user_t) @@ -120,10 +124,10 @@ module dmac_wrap #( // Memory Init typedefs /// init read request typedef struct packed { - logic [AXI_ADDR_WIDTH-1:0] cfg; - logic [AXI_DATA_WIDTH-1:0] term; - logic [AXI_DATA_WIDTH/8-1:0] strb; - logic [AXI_ID_WIDTH-1:0] id; + logic [AXI_ADDR_WIDTH-1:0] cfg; + logic [AXI_DATA_WIDTH-1:0] term; + logic [AXI_DATA_WIDTH/8-1:0] strb; + logic [AXI_ID_WIDTH-1:0] id; } init_req_chan_t; typedef struct packed { @@ -150,9 +154,21 @@ module dmac_wrap #( obi_req_t [NUM_BIDIR_STREAMS-1:0] - obi_read_req_from_dma, obi_read_req_from_rrc, obi_reorg_req_from_dma, obi_reorg_req_from_rrc, obi_write_req_from_dma, obi_write_req_from_rrc, obi_read_req_muxed; + obi_read_req_from_dma, + obi_read_req_from_rrc, + obi_reorg_req_from_dma, + obi_reorg_req_from_rrc, + obi_write_req_from_dma, + obi_write_req_from_rrc, + obi_read_req_muxed; obi_rsp_t [NUM_BIDIR_STREAMS-1:0] - obi_read_rsp_to_dma, obi_read_rsp_to_rrc, obi_reorg_rsp_to_dma, obi_reorg_rsp_to_rrc, obi_write_rsp_to_dma, obi_write_rsp_to_rrc, obi_read_rsp_to_mux; + obi_read_rsp_to_dma, + obi_read_rsp_to_rrc, + obi_reorg_rsp_to_dma, + obi_reorg_rsp_to_rrc, + obi_write_rsp_to_dma, + obi_write_rsp_to_rrc, + obi_read_rsp_to_mux; // BUS definitions @@ -187,7 +203,7 @@ module dmac_wrap #( // Register BUS definitions localparam int unsigned RegAddrWidth = 32'd10; `REG_BUS_TYPEDEF_ALL(dma_regs, logic[RegAddrWidth-1:0], logic[DATA_WIDTH-1:0], - logic[BE_WIDTH-1:0]) + logic[BE_WIDTH-1:0]) dma_regs_req_t [NumRegs-1:0] dma_regs_req; dma_regs_rsp_t [NumRegs-1:0] dma_regs_rsp; @@ -197,7 +213,7 @@ module dmac_wrap #( localparam int unsigned RepWidth = 32'd32; localparam int unsigned StrideWidth = 32'd32; typedef logic [TFLenWidth-1:0] tf_len_t; - typedef logic [RepWidth-1:0] reps_t; + typedef logic [RepWidth-1:0] reps_t; typedef logic [StrideWidth-1:0] strides_t; // iDMA request / response types @@ -214,9 +230,9 @@ module dmac_wrap #( idma_rsp_t [NumStreams-1:0] idma_rsp; logic one_fe_valid; - logic [NumStreams-1:0] fe_valid, twod_queue_valid, be_valid, be_rsp_valid; - logic [NumStreams-1:0] fe_ready, twod_queue_ready, be_ready, be_rsp_ready; - logic [NumStreams-1:0] trans_complete, midend_busy; + logic [NumStreams-1:0] fe_valid, twod_queue_valid, be_valid, be_rsp_valid; + logic [NumStreams-1:0] fe_ready, twod_queue_ready, be_ready, be_rsp_ready; + logic [NumStreams-1:0] trans_complete, midend_busy; idma_pkg::idma_busy_t [NumStreams-1:0] idma_busy; logic [31:0][NumStreams-1:0] done_id, next_id; @@ -361,7 +377,7 @@ module dmac_wrap #( // Meta Channel Widths localparam int unsigned axi_aw_chan_width = axi_pkg::aw_width( - AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH + AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH ); localparam int unsigned init_req_chan_width = $bits(init_req_chan_t); localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t); @@ -454,31 +470,29 @@ module dmac_wrap #( // use a spill register to only give responses when a request was // (or is) asserted spill_register #( - .T(logic[-1:0]) - ) - i_init_read_rsp_reflect ( + .T(logic [-1:0]) + ) i_init_read_rsp_reflect ( .clk_i, .rst_ni, .valid_i(init_read_req.req_valid), .ready_o(init_read_rsp.req_ready), - .data_i('0), // not used + .data_i('0), // not used .valid_o(init_read_rsp.rsp_valid), .ready_i(init_read_req.rsp_ready), .data_o() ); //implement zero memory using init protocol - assign init_read_rsp.rsp_chan.init = '0; + assign init_read_rsp.rsp_chan.init = '0; // implement /dev/null spill_register #( - .T(logic[-1:0]) - ) - i_init_write_rsp_reflect ( + .T(logic [-1:0]) + ) i_init_write_rsp_reflect ( .clk_i, .rst_ni, .valid_i(init_write_req.req_valid), .ready_o(init_write_rsp.req_ready), - .data_i('0), // not used + .data_i('0), // not used .valid_o(init_write_rsp.rsp_valid), .ready_i(init_write_req.rsp_ready), .data_o() @@ -491,7 +505,7 @@ module dmac_wrap #( // Meta Channel Widths localparam int unsigned axi_ar_chan_width = axi_pkg::ar_width( - AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH + AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH ); localparam int unsigned init_req_chan_width = $bits(init_req_chan_t); localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t); @@ -502,19 +516,25 @@ module dmac_wrap #( typedef struct packed { axi_ar_chan_t ar_chan; - logic [`MY_MAX( -axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) -)-axi_ar_chan_width:0] padding; + logic [ + `MY_MAX( + axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) + ) + -axi_ar_chan_width:0] padding; } axi_read_ar_chan_padded_t; typedef struct packed { init_req_chan_t req_chan; - logic [`MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width))-init_req_chan_width:0] padding; + logic [ + `MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width)) + -init_req_chan_width:0] padding; } init_read_req_chan_padded_t; typedef struct packed { obi_a_chan_t a_chan; - logic [`MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width))-obi_a_chan_width:0] padding; + logic [ + `MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width)) + -obi_a_chan_width:0] padding; } obi_read_a_chan_padded_t; typedef union packed { @@ -597,30 +617,28 @@ axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) // use a spill register to only give responses when a request was // (or is) asserted spill_register #( - .T(logic[-1:0]) - ) - i_init_read_rsp_reflect ( + .T(logic [-1:0]) + ) i_init_read_rsp_reflect ( .clk_i, .rst_ni, .valid_i(init_read_req.req_valid), .ready_o(init_read_rsp.req_ready), - .data_i('0), // not used + .data_i('0), // not used .valid_o(init_read_rsp.rsp_valid), .ready_i(init_read_req.rsp_ready), .data_o() ); //implement zero memory using init protocol - assign init_read_rsp.rsp_chan.init = '0; + assign init_read_rsp.rsp_chan.init = '0; // implement /dev/null spill_register #( - .T(logic[-1:0]) - ) - i_init_write_rsp_reflect ( + .T(logic [-1:0]) + ) i_init_write_rsp_reflect ( .clk_i, .rst_ni, .valid_i(init_write_req.req_valid), .ready_o(init_write_rsp.req_ready), - .data_i('0), // not used + .data_i('0), // not used .valid_o(init_write_rsp.rsp_valid), .ready_i(init_write_req.rsp_ready), .data_o() @@ -642,11 +660,11 @@ axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) DataWidth: AXI_DATA_WIDTH, IdWidth: 0, Integrity: 1'b0, - BeFull: 1'b1, + BeFull: 1'b1, OptionalCfg: obi_pkg::ObiMinimalOptionalConfig }; - // iDMA OBI + // iDMA OBI obi_mux #( .SbrPortObiCfg (sbr_obi_cfg), @@ -670,18 +688,17 @@ axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) .mgr_port_rsp_i (obi_read_rsp_to_mux[s]) ); assign obi_reorg_req_from_rrc = '0; - assign obi_reorg_rsp_to_rrc = '0; + assign obi_reorg_rsp_to_rrc = '0; end else begin // if (MUX_READ) // pass through the read req/rsp from/to dma assign obi_read_req_muxed = obi_read_req_from_dma; assign obi_read_rsp_to_dma = obi_read_rsp_to_mux; - + obi_rready_converter #( .obi_a_chan_t(obi_a_chan_t), .obi_r_chan_t(obi_r_chan_t), .DEPTH(1) - ) - obi_rready_converter_reorg_i ( + ) obi_rready_converter_reorg_i ( .clk_i, .rst_ni, .test_mode_i, @@ -698,54 +715,53 @@ axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) .gnt_i(obi_reorg_rsp_to_rrc[s].gnt), .rvalid_i(obi_reorg_rsp_to_rrc[s].rvalid) ); - end // else: !if(MUX_READ) - - obi_rready_converter #( - .obi_a_chan_t(obi_a_chan_t), - .obi_r_chan_t(obi_r_chan_t), - .DEPTH(1) - ) - obi_rready_converter_read_i ( - .clk_i, - .rst_ni, - .test_mode_i, - .sbr_a_chan_i(obi_read_req_muxed[s].a), - .req_i(obi_read_req_muxed[s].req), - .gnt_o(obi_read_rsp_to_mux[s].gnt), - .rready_i(obi_read_req_muxed[s].rready), - .sbr_r_chan_o(obi_read_rsp_to_mux[s].r), - .rvalid_o(obi_read_rsp_to_mux[s].rvalid), - .mgr_a_chan_o(obi_read_req_from_rrc[s].a), - .req_o(obi_read_req_from_rrc[s].req), - .rready_o(obi_read_req_from_rrc[s].rready), - .mgr_r_chan_i(obi_read_rsp_to_rrc[s].r), - .gnt_i(obi_read_rsp_to_rrc[s].gnt), - .rvalid_i(obi_read_rsp_to_rrc[s].rvalid) - ); + end // else: !if(MUX_READ) + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .DEPTH(1) + ) obi_rready_converter_read_i ( + .clk_i, + .rst_ni, + .test_mode_i, + .sbr_a_chan_i(obi_read_req_muxed[s].a), + .req_i(obi_read_req_muxed[s].req), + .gnt_o(obi_read_rsp_to_mux[s].gnt), + .rready_i(obi_read_req_muxed[s].rready), + .sbr_r_chan_o(obi_read_rsp_to_mux[s].r), + .rvalid_o(obi_read_rsp_to_mux[s].rvalid), + .mgr_a_chan_o(obi_read_req_from_rrc[s].a), + .req_o(obi_read_req_from_rrc[s].req), + .rready_o(obi_read_req_from_rrc[s].rready), + .mgr_r_chan_i(obi_read_rsp_to_rrc[s].r), + .gnt_i(obi_read_rsp_to_rrc[s].gnt), + .rvalid_i(obi_read_rsp_to_rrc[s].rvalid) + ); - obi_rready_converter #( - .obi_a_chan_t(obi_a_chan_t), - .obi_r_chan_t(obi_r_chan_t), - .DEPTH(1) - ) - obi_rready_converter_wr_i ( - .clk_i, - .rst_ni, - .test_mode_i, - .sbr_a_chan_i(obi_write_req_from_dma[s].a), - .req_i(obi_write_req_from_dma[s].req), - .gnt_o(obi_write_rsp_to_dma[s].gnt), - .rready_i(obi_write_req_from_dma[s].rready), - .sbr_r_chan_o(obi_write_rsp_to_dma[s].r), - .rvalid_o(obi_write_rsp_to_dma[s].rvalid), - .mgr_a_chan_o(obi_write_req_from_rrc[s].a), - .req_o(obi_write_req_from_rrc[s].req), - .rready_o(obi_write_req_from_rrc[s].rready), - .mgr_r_chan_i(obi_write_rsp_to_rrc[s].r), - .gnt_i(obi_write_rsp_to_rrc[s].gnt), - .rvalid_i(obi_write_rsp_to_rrc[s].rvalid) - ); + + + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .DEPTH(1) + ) obi_rready_converter_wr_i ( + .clk_i, + .rst_ni, + .test_mode_i, + .sbr_a_chan_i(obi_write_req_from_dma[s].a), + .req_i(obi_write_req_from_dma[s].req), + .gnt_o(obi_write_rsp_to_dma[s].gnt), + .rready_i(obi_write_req_from_dma[s].rready), + .sbr_r_chan_o(obi_write_rsp_to_dma[s].r), + .rvalid_o(obi_write_rsp_to_dma[s].rvalid), + .mgr_a_chan_o(obi_write_req_from_rrc[s].a), + .req_o(obi_write_req_from_rrc[s].req), + .rready_o(obi_write_req_from_rrc[s].rready), + .mgr_r_chan_i(obi_write_rsp_to_rrc[s].r), + .gnt_i(obi_write_rsp_to_rrc[s].gnt), + .rvalid_i(obi_write_rsp_to_rrc[s].rvalid) + ); end @@ -753,67 +769,78 @@ axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) // TCDM connections // ------------------------------------------------------ for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin + if (TCDM_MEM2BANKS) begin : tcdm_mem2banks + // Currently, mem2banks only implemented for AXI_DATA_WIDTH==64 + // TODO: parametrize so it works for arbitrary data widths + initial begin : mem2banks_check_axi_width + if (AXI_DATA_WIDTH != 64) begin + $error("idma_wrap: AXI_DATA_WIDTH must be 64 when TCDM_MEM2BANKS is 1!"); + end + end + + logic tcdm_master_we_0; + logic tcdm_master_we_1; + logic tcdm_master_we_2; + logic tcdm_master_we_3; + logic tcdm_master_we_4; + logic tcdm_master_we_5; - logic tcdm_master_we_0; - logic tcdm_master_we_1; - logic tcdm_master_we_2; - logic tcdm_master_we_3; - logic tcdm_master_we_4; - logic tcdm_master_we_5; - - mem_to_banks #( - .AddrWidth(AXI_ADDR_WIDTH), - .DataWidth(AXI_DATA_WIDTH), - .NumBanks (32'd2), - .HideStrb (1'b1), - .MaxTrans (32'd1), - .FifoDepth(32'd1) - ) i_mem_to_banks_read ( - .clk_i, - .rst_ni, - .req_i(obi_read_req_from_rrc[s].req), - .gnt_o(obi_read_rsp_to_rrc[s].gnt), - .addr_i(obi_read_req_from_rrc[s].a.addr), - .wdata_i(obi_read_req_from_rrc[s].a.wdata), - .strb_i(obi_read_req_from_rrc[s].a.be), - .atop_i('0), - .we_i(obi_read_req_from_rrc[s].a.we), - .rvalid_o(obi_read_rsp_to_rrc[s].rvalid), - .rdata_o(obi_read_rsp_to_rrc[s].r.rdata), - .bank_req_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req + mem_to_banks #( + .AddrWidth(AXI_ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_write ( + .clk_i, + .rst_ni, + .req_i(obi_write_req_from_rrc[s].req), + .gnt_o(obi_write_rsp_to_rrc[s].gnt), + .addr_i(obi_write_req_from_rrc[s].a.addr), + .wdata_i(obi_write_req_from_rrc[s].a.wdata), + .strb_i(obi_write_req_from_rrc[s].a.be), + .atop_i('0), + .we_i(obi_write_req_from_rrc[s].a.we), + .rvalid_o(obi_write_rsp_to_rrc[s].rvalid), + .rdata_o(obi_write_rsp_to_rrc[s].r.rdata), + .bank_req_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req }), - .bank_gnt_i({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt + .bank_gnt_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt }), - .bank_addr_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add + .bank_addr_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add }), - .bank_wdata_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data + .bank_wdata_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data }), - .bank_strb_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be + .bank_strb_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be }), - .bank_atop_o( /* NOT CONNECTED */), - .bank_we_o({tcdm_master_we_3, tcdm_master_we_2}), - .bank_rvalid_i({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_valid, - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid + .bank_atop_o( /* NOT CONNECTED */), + .bank_we_o({tcdm_master_we_1, tcdm_master_we_0}), + .bank_rvalid_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid }), - .bank_rdata_i({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_data, - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data + .bank_rdata_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data }) - ); + ); - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].user = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !tcdm_master_we_2; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].wen = !tcdm_master_we_3; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].wen = !tcdm_master_we_0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !tcdm_master_we_1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].ecc = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].ecc = '0; - if (!MUX_READ) begin // if we don't mux the read, we have 6*NUM_BIDIR_STREAMS interfaces and the reorg - // interface goes straight to TCDM masters 5 and 4. mem_to_banks #( .AddrWidth(AXI_ADDR_WIDTH), .DataWidth(AXI_DATA_WIDTH), @@ -821,105 +848,165 @@ axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) .HideStrb (1'b1), .MaxTrans (32'd1), .FifoDepth(32'd1) - ) i_mem_to_banks_reorg ( + ) i_mem_to_banks_read ( .clk_i, .rst_ni, - .req_i(obi_reorg_req_from_rrc[s].req), - .gnt_o(obi_reorg_rsp_to_rrc[s].gnt), - .addr_i(obi_reorg_req_from_rrc[s].a.addr), - .wdata_i(obi_reorg_req_from_rrc[s].a.wdata), - .strb_i(obi_reorg_req_from_rrc[s].a.be), + .req_i(obi_read_req_from_rrc[s].req), + .gnt_o(obi_read_rsp_to_rrc[s].gnt), + .addr_i(obi_read_req_from_rrc[s].a.addr), + .wdata_i(obi_read_req_from_rrc[s].a.wdata), + .strb_i(obi_read_req_from_rrc[s].a.be), .atop_i('0), - .we_i(obi_reorg_req_from_rrc[s].a.we), - .rvalid_o(obi_reorg_rsp_to_rrc[s].rvalid), - .rdata_o(obi_reorg_rsp_to_rrc[s].r.rdata), + .we_i(obi_read_req_from_rrc[s].a.we), + .rvalid_o(obi_read_rsp_to_rrc[s].rvalid), + .rdata_o(obi_read_rsp_to_rrc[s].r.rdata), .bank_req_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req + }), + .bank_gnt_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt + }), + .bank_addr_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add + }), + .bank_wdata_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data + }), + .bank_strb_o({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be + }), + .bank_atop_o( /* NOT CONNECTED */), + .bank_we_o({tcdm_master_we_3, tcdm_master_we_2}), + .bank_rvalid_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_valid, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid + }), + .bank_rdata_i({ + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_data, + tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data + }) + ); + + + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].boffs = '0; + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0; + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].boffs = '0; + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !tcdm_master_we_2; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].wen = !tcdm_master_we_3; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].ecc = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].ecc = '0; + + + if (!MUX_READ) begin // if we don't mux the read, we have 6*NUM_BIDIR_STREAMS interfaces and the reorg + // interface goes straight to TCDM masters 5 and 4. + mem_to_banks #( + .AddrWidth(AXI_ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_reorg ( + .clk_i, + .rst_ni, + .req_i(obi_reorg_req_from_rrc[s].req), + .gnt_o(obi_reorg_rsp_to_rrc[s].gnt), + .addr_i(obi_reorg_req_from_rrc[s].a.addr), + .wdata_i(obi_reorg_req_from_rrc[s].a.wdata), + .strb_i(obi_reorg_req_from_rrc[s].a.be), + .atop_i('0), + .we_i(obi_reorg_req_from_rrc[s].a.we), + .rvalid_o(obi_reorg_rsp_to_rrc[s].rvalid), + .rdata_o(obi_reorg_rsp_to_rrc[s].r.rdata), + .bank_req_o({ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].req }), - .bank_gnt_i({ + .bank_gnt_i({ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].gnt }), - .bank_addr_o({ + .bank_addr_o({ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].add }), - .bank_wdata_o({ + .bank_wdata_o({ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].data }), - .bank_strb_o({ + .bank_strb_o({ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].be }), - .bank_atop_o( /* NOT CONNECTED */), - .bank_we_o({tcdm_master_we_5, tcdm_master_we_4}), - .bank_rvalid_i({ + .bank_atop_o( /* NOT CONNECTED */), + .bank_we_o({tcdm_master_we_5, tcdm_master_we_4}), + .bank_rvalid_i({ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_valid, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_valid }), - .bank_rdata_i({ + .bank_rdata_i({ tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_data }) - ); - - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].boffs = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].lrdy = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].user = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].boffs = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].lrdy = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].user = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].wen = !tcdm_master_we_4; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].wen = !tcdm_master_we_5; - + ); + + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].boffs = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].boffs = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].wen = !tcdm_master_we_4; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].wen = !tcdm_master_we_5; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].ecc = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].ecc = '0; + end + end else begin : passthrough_obi_to_tcdm + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req = obi_write_req_from_rrc[s].req; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].wen = !obi_write_req_from_rrc[s].a.we; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add = obi_write_req_from_rrc[s].a.addr; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data = obi_write_req_from_rrc[s].a.wdata; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be = obi_write_req_from_rrc[s].a.be; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_ready = obi_write_req_from_rrc[s].rready; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].ecc = '0; + assign obi_write_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt; + assign obi_write_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid; + assign obi_write_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req = obi_read_req_from_rrc[s].req; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !obi_read_req_from_rrc[s].a.we; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add = obi_read_req_from_rrc[s].a.addr; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data = obi_read_req_from_rrc[s].a.wdata; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be = obi_read_req_from_rrc[s].a.be; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_ready = obi_read_req_from_rrc[s].rready; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].ecc = '0; + assign obi_read_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt; + assign obi_read_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid; + assign obi_read_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data; + if (!MUX_READ) begin : passthrough_obi_read + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req = obi_reorg_req_from_rrc[s].req; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !obi_reorg_req_from_rrc[s].a.we; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add = obi_reorg_req_from_rrc[s].a.addr; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data = obi_reorg_req_from_rrc[s].a.wdata; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be = obi_reorg_req_from_rrc[s].a.be; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_ready = obi_read_req_from_rrc[s].rready; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].ecc = '0; + assign obi_reorg_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt; + assign obi_reorg_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid; + assign obi_reorg_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0; + end + end end - mem_to_banks #( - .AddrWidth(AXI_ADDR_WIDTH), - .DataWidth(AXI_DATA_WIDTH), - .NumBanks (32'd2), - .HideStrb (1'b1), - .MaxTrans (32'd1), - .FifoDepth(32'd1) - ) i_mem_to_banks_write ( - .clk_i, - .rst_ni, - .req_i(obi_write_req_from_rrc[s].req), - .gnt_o(obi_write_rsp_to_rrc[s].gnt), - .addr_i(obi_write_req_from_rrc[s].a.addr), - .wdata_i(obi_write_req_from_rrc[s].a.wdata), - .strb_i(obi_write_req_from_rrc[s].a.be), - .atop_i('0), - .we_i(obi_write_req_from_rrc[s].a.we), - .rvalid_o(obi_write_rsp_to_rrc[s].rvalid), - .rdata_o(obi_write_rsp_to_rrc[s].r.rdata), - .bank_req_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req - }), - .bank_gnt_i({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt - }), - .bank_addr_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add - }), - .bank_wdata_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data - }), - .bank_strb_o({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be - }), - .bank_atop_o( /* NOT CONNECTED */), - .bank_we_o({tcdm_master_we_1, tcdm_master_we_0}), - .bank_rvalid_i({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid, - tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid - }), - .bank_rdata_i({ - tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data - }) - ); - - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].user = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].wen = !tcdm_master_we_0; - assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !tcdm_master_we_1; - - end endmodule -`undef MY_MAX +`undef MY_MAX \ No newline at end of file diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 42894248..a17c9eb3 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -23,6 +23,7 @@ `include "pulp_interfaces.sv" `include "register_interface/typedef.svh" + module pulp_cluster import pulp_cluster_package::*; import hci_package::*; @@ -341,6 +342,16 @@ logic [Cfg.NumCores-1:0] hmr_barrier_matched; logic [Cfg.NumCores-1:0] hmr_dmr_sw_resynch_req, hmr_tmr_sw_resynch_req; logic [Cfg.NumCores-1:0] hmr_dmr_sw_synch_req, hmr_tmr_sw_synch_req; +// number of log interconnect ports per DMA HCI port - i.e., how many times the +// DMA ports are wider than the intc ports +// DMA ports are currently muxed together with HWPE ports to a single port. +// Thus they don't contribute to the ID width +// TODO Arpan correct this if needed +localparam DMA_IW_CONTRIB_FAC = Cfg.DmaUseHwpePort ? 0 : 1; +// data width of the TCDM master ports coming from the DMA. +// if using MCHAN, must be 32 +localparam int unsigned DMA_HCI_DATA_WIDTH = Cfg.DmaUseHwpePort ? Cfg.AxiDataOutWideWidth : DataWidth; + localparam hci_package::hci_size_parameter_t HciCoreSizeParam = '{ DW: DataWidth, AW: AddrWidth, @@ -359,6 +370,16 @@ localparam hci_package::hci_size_parameter_t HciHwpeSizeParam = '{ EW: (Cfg.ECCInterco) ? HWPEParityWidth : DEFAULT_EW, EHW: DEFAULT_EHW }; +localparam hci_package::hci_size_parameter_t HciDmaSizeParam = '{ + DW: DMA_HCI_DATA_WIDTH, + AW: AddrWidth, + BW: DEFAULT_BW, + UW: DEFAULT_UW, + IW: DEFAULT_IW, + EW: DEFAULT_EW, + EHW: DEFAULT_EHW +}; + /* logarithmic and peripheral interconnect interfaces */ // ext -> log interconnect hci_core_intf #( @@ -374,14 +395,13 @@ XBAR_PERIPH_BUS s_xbar_speriph_bus[Cfg.NumSlvPeriphs-1:0](); // periph interconnect -> HWPE subsystem XBAR_PERIPH_BUS s_hwpe_cfg_bus(); -// DMA -> log interconnect +// DMA -> (optionally) size converter hci_core_intf #( - .DW ( HciCoreSizeParam.DW ), - .AW ( HciCoreSizeParam.AW ) + .DW ( HciDmaSizeParam.DW ), + .AW ( HciDmaSizeParam.AW ) ) s_hci_dma[0:Cfg.DmaNumPlugs-1] ( .clk ( clk_i ) ); -XBAR_TCDM_BUS s_dma_plugin_xbar_bus[Cfg.DmaNumPlugs-1:0](); // ext -> xbar periphs FIXME XBAR_TCDM_BUS s_mperiph_xbar_bus[Cfg.NumMstPeriphs-1:0](); @@ -462,7 +482,11 @@ snitch_icache_pkg::icache_l0_events_t [Cfg.NumCores-1:0] s_icache_l0_events; snitch_icache_pkg::icache_l1_events_t s_icache_l1_events; //----------------------------------------------------------------------// -localparam TCDM_ID_WIDTH = Cfg.NumCores + Cfg.DmaNumPlugs + 4 + Cfg.HwpeNumPorts; +// DMA ports do not need ID extension if mapped to HWPE ports as they are +// currently muxed +// TODO Arpan fix if needed +localparam TCDM_ID_WIDTH = Cfg.NumCores + Cfg.DmaNumPlugs*DMA_IW_CONTRIB_FAC + 4 + Cfg.HwpeNumPorts; + localparam hci_package::hci_size_parameter_t HciMemSizeParam = '{ DW: DataWidth, AW: AddrMemWidth+2, // AddrMemWidth is word-wise, +2 for byte-wise @@ -709,8 +733,9 @@ per2axi_wrap #( cluster_interconnect_wrap #( .NB_CORES ( Cfg.NumCores ), - .HWPE_PRESENT ( Cfg.HwpePresent ), - .NB_HWPE_PORTS ( Cfg.HwpeNumPorts ), + .NB_HWPE ( Cfg.HwpePresent ), + .HWPE_WIDTH_FAC ( Cfg.HwpeNumPorts ), + .DMA_USE_HWPE_PORT ( Cfg.DmaUseHwpePort ), .NB_DMAS ( Cfg.DmaNumPlugs ), .NB_MPERIPHS ( Cfg.NumMstPeriphs ), .NB_TCDM_BANKS ( Cfg.TcdmNumBank ), @@ -732,6 +757,7 @@ cluster_interconnect_wrap #( .USE_ECC_INTERCONNECT ( Cfg.EnableECC && Cfg.ECCInterco ), .HCI_CORE_SIZE ( HciCoreSizeParam ), .HCI_HWPE_SIZE ( HciHwpeSizeParam ), + .HCI_DMA_SIZE ( HciDmaSizeParam ), .HCI_MEM_SIZE ( HciMemSizeParam ) ) cluster_interconnect_wrap_i ( @@ -781,6 +807,7 @@ dmac_wrap #( .NUM_BIDIR_STREAMS ( 1 ), .GLOBAL_QUEUE_DEPTH ( 2 ), .MUX_READ ( 1'b1 ), + .TCDM_MEM2BANKS ( !Cfg.DmaUseHwpePort ) `endif ) dmac_wrap_i ( .clk_i ( clk_i ), @@ -1929,4 +1956,26 @@ edge_propagator_tx ep_dma_pe_irq_i ( .valid_o ( dma_pe_irq_valid_o ) ); +// pragma translate_off +`ifndef VERILATOR +initial begin : p_assert + `ifdef TARGET_MCHAN + assert(DMA_HCI_DATA_WIDTH == 32) + else $fatal(1, "When using MCHAN, DMA_HCI_DATA_WIDTH must be 32!"); + assert(Cfg.DmaNumPlugs == 4) + else $fatal(1, "When using MCHAN, Cfg.DmaNumPlugs must be 4!"); + assert(!Cfg.DmaUseHwpePort) + else $fatal(1, "When using MCHAN, Cfg.DmaUseHwpePort must be 0!"); + `else + if (!Cfg.DmaUseHwpePort) begin + // The DMA can have wide access to TCDM only when sharing the master port to HCI with the HWPE + assert(DMA_HCI_DATA_WIDTH == DataWidth) + else $fatal(1, "When Cfg.DmaUseHwpePort is 0, DMA_HCI_DATA_WIDTH must be equal to DataWidth!"); + end + `endif +end +`endif +// pragma translate_on + + endmodule diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 3f9438f8..e69639c2 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -56,7 +56,7 @@ module pulp_cluster_tb; localparam AxiWideByteOffset = $clog2(AxiWideBeWidth); localparam AxiUw = 10; - localparam DmaAxiDw = 64; + localparam DmaAxiDw = 256; localparam DmaAxiIw = 1; localparam bit[AxiAw-1:0] ClustBase = 'h10000000; @@ -381,6 +381,7 @@ module pulp_cluster_tb; DmaNumPlugs: `NB_DMAS, DmaNumOutstandingBursts: 8, DmaBurstLength: 256, + DmaUseHwpePort: 1, NumMstPeriphs: `NB_MPERIPHS, NumSlvPeriphs: `NB_SPERIPHS, ClusterAlias: 1, @@ -390,7 +391,7 @@ module pulp_cluster_tb; TcdmSize: 128*1024, TcdmNumBank: 16, HwpePresent: 1, - HwpeCfg: '{NumHwpes: 3, HwpeList: {SOFTEX, NEUREKA, REDMULE}}, + HwpeCfg: '{NumHwpes: 1, HwpeList: {NEUREKA}}, HwpeNumPorts: 9, HMRPresent: 1, HMRDmrEnabled: 1, From ad9b8f7e764e4a785778e6d0538bfd8435ab0589 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 13 Feb 2025 12:54:20 +0100 Subject: [PATCH 07/67] tb: handle non-word (64-bit bursts) aligned sections The linker script has L1 address ORIGIN set to 0x10000004 even through in hardware it is set to 0x10000000. However the testbench assumes 64b alignment to initialize the L1. Thus, the data was shifted by 32b in the simulation. While the AXI bursts are set to 64b, the misalignment needs to be handled coming from the linker script. --- tb/pulp_cluster_tb.sv | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index e69639c2..549efd48 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -537,22 +537,28 @@ module pulp_cluster_tb; input string binary; // File name addr_t section_addr, section_len; byte buffer[]; + addr_t section_addr_offset; // Read ELF void'(read_elf(binary)); $display("[TB] Reading %s", binary); while (get_section(section_addr, section_len)) begin // Read Sections - automatic int num_words = (section_len + AxiWideBeWidth - 1)/AxiWideBeWidth; - $display("[TB] Reading section %x with %0d words", section_addr, num_words); + automatic int num_start_unaligned_bytes = section_addr%AxiWideBeWidth; + automatic int num_wide_words = (num_start_unaligned_bytes + section_len + AxiWideBeWidth - 1)/AxiWideBeWidth; - sections[section_addr >> AxiWideByteOffset] = num_words; - buffer = new[num_words * AxiWideBeWidth]; + $display("[TB] Reading section %x with %0d words", section_addr, num_wide_words); + + sections[section_addr >> AxiWideByteOffset] = num_wide_words; + buffer = new[num_wide_words * AxiWideBeWidth]; void'(read_section(section_addr, buffer, section_len)); - for (int i = 0; i < num_words; i++) begin + for (int i = 0; i < num_wide_words; i++) begin automatic logic [AxiWideBeWidth-1:0][7:0] word = '0; for (int j = 0; j < AxiWideBeWidth; j++) begin - word[j] = buffer[i * AxiWideBeWidth + j]; + automatic int index = i * AxiWideBeWidth + j - num_start_unaligned_bytes; + if(index >= 0) begin + word[j] = buffer[index]; + end end memory[section_addr/AxiWideBeWidth + i] = word; end From a876d779928c522752a9f410ffc38e7ba42ff694 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 18 Feb 2025 13:40:29 +0100 Subject: [PATCH 08/67] hw: Parametrize number of EXT ports to TCDM --- include/pulp_soc_defines.sv | 1 + rtl/cluster_interconnect_wrap.sv | 9 +++++---- rtl/pulp_cluster.sv | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/pulp_soc_defines.sv b/include/pulp_soc_defines.sv index 44eacd4f..f9602f1c 100644 --- a/include/pulp_soc_defines.sv +++ b/include/pulp_soc_defines.sv @@ -49,5 +49,6 @@ `define NB_CLUSTERS 1 `define NB_CORES 8 `define NB_DMAS 2 +`define NB_EXT 4 `define NB_MPERIPHS 1 `define NB_SPERIPHS 12 diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index d7332a06..1d4eec04 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -15,6 +15,7 @@ */ `include "hci_helpers.svh" +`include "pulp_soc_defines.sv" import hci_package::*; @@ -62,7 +63,7 @@ module cluster_interconnect_wrap hci_core_intf.target core_tcdm_slave [0 : NB_CORES-1 ], hci_core_intf.target hwpe_tcdm_slave [0 : NB_HWPE-1 ], XBAR_PERIPH_BUS.Slave core_periph_slave [NB_CORES-1 : 0 ], - hci_core_intf.target ext_slave [0 : 3 ], + hci_core_intf.target ext_slave [0 : `NB_EXT-1 ], hci_core_intf.target dma_slave [0 : NB_DMAS-1 ], XBAR_TCDM_BUS.Slave mperiph_slave [NB_MPERIPHS-1 : 0 ], hci_core_intf.initiator tcdm_sram_master [0 : NB_TCDM_BANKS-1], @@ -138,7 +139,7 @@ module cluster_interconnect_wrap .N_HWPE ( N_HCI_HWPE_PORTS ), .N_CORE ( NB_CORES ), .N_DMA ( N_HCI_DMA_PORTS ), - .N_EXT ( 4 ), + .N_EXT ( `NB_EXT ), .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), @@ -170,7 +171,7 @@ module cluster_interconnect_wrap .N_HWPE ( N_HCI_HWPE_PORTS ), .N_CORE ( NB_CORES ), .N_DMA ( N_HCI_DMA_PORTS ), - .N_EXT ( 4 ), + .N_EXT ( `NB_EXT ), .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), @@ -242,7 +243,7 @@ module cluster_interconnect_wrap .N_HWPE ( 0 ), .N_CORE ( NB_CORES+HWPE_WIDTH_FAC ), .N_DMA ( NB_DMAS ), - .N_EXT ( 4 ), + .N_EXT ( `NB_EXT ), .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .AWC ( ADDR_WIDTH ), diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index a17c9eb3..ec763412 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -22,6 +22,7 @@ `include "cluster_bus_defines.sv" `include "pulp_interfaces.sv" `include "register_interface/typedef.svh" +`include "pulp_soc_defines.sv" module pulp_cluster @@ -385,7 +386,7 @@ localparam hci_package::hci_size_parameter_t HciDmaSizeParam = '{ hci_core_intf #( .DW ( HciCoreSizeParam.DW ), .AW ( HciCoreSizeParam.AW ) -) s_hci_ext[0:Cfg.DmaNumPlugs-1] ( +) s_hci_ext[0:`NB_EXT-1] ( .clk ( clk_i ) ); @@ -485,7 +486,7 @@ snitch_icache_pkg::icache_l1_events_t s_icache_l1_events; // DMA ports do not need ID extension if mapped to HWPE ports as they are // currently muxed // TODO Arpan fix if needed -localparam TCDM_ID_WIDTH = Cfg.NumCores + Cfg.DmaNumPlugs*DMA_IW_CONTRIB_FAC + 4 + Cfg.HwpeNumPorts; +localparam TCDM_ID_WIDTH = Cfg.NumCores + Cfg.DmaNumPlugs*DMA_IW_CONTRIB_FAC + `NB_EXT + Cfg.HwpeNumPorts; localparam hci_package::hci_size_parameter_t HciMemSizeParam = '{ DW: DataWidth, @@ -654,7 +655,7 @@ cluster_bus_wrap #( ); axi2mem_wrap #( - .NB_DMAS ( Cfg.DmaNumPlugs ), + .NB_DMAS ( `NB_EXT ), .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), .AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ), .AXI_USER_WIDTH ( Cfg.AxiUserWidth ), From f79c1d85e2b2c0c5cba2667565d0f284145da485 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Fri, 14 Feb 2025 14:50:59 +0100 Subject: [PATCH 09/67] hw: Fix combinational loop in HCI --- rtl/cluster_interconnect_wrap.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index 1d4eec04..c9172627 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -143,6 +143,7 @@ module cluster_interconnect_wrap .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), + .EXPFIFO ( 2 ), //For an explanation of these macros refer to https://github.com/pulp-platform/hci/blob/v2.1.1/rtl/common/hci_helpers.svh .`HCI_SIZE_PARAM(cores) ( HCI_CORE_SIZE ), .`HCI_SIZE_PARAM(mems) ( HCI_MEM_SIZE ), @@ -175,6 +176,7 @@ module cluster_interconnect_wrap .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), + .EXPFIFO ( 2 ), .`HCI_SIZE_PARAM(cores) ( HCI_CORE_SIZE ), .`HCI_SIZE_PARAM(mems) ( HCI_MEM_SIZE ), .`HCI_SIZE_PARAM(hwpe) ( HCI_HWPE_SIZE ) From ec13fc0fda22f471b6e532d883ee9134548add33 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Fri, 14 Feb 2025 14:52:35 +0100 Subject: [PATCH 10/67] tb: Disable ECC hardware --- tb/pulp_cluster_tb.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 549efd48..b2d7e80f 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -403,8 +403,8 @@ module pulp_cluster_tb; HMRSeparateDataVoters: 1, HMRSeparateAxiBus: 0, HMRNumBusVoters: 1, - EnableECC: 1, - ECCInterco: 1, + EnableECC: 0, + ECCInterco: 0, iCacheNumBanks: 2, iCacheNumLines: 1, iCacheNumWays: 4, From 2f5e74a47ca99b2c606d60f596d1f0cfb9a5b1af Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 20 Feb 2025 19:18:57 +0100 Subject: [PATCH 11/67] package: Change config struct to packed type Being unnecessarily unpacked, it was not compatible with other systems (like Cheshire) --- packages/pulp_cluster_package.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/pulp_cluster_package.sv b/packages/pulp_cluster_package.sv index 05a6e60d..f5bef1a0 100644 --- a/packages/pulp_cluster_package.sv +++ b/packages/pulp_cluster_package.sv @@ -41,13 +41,13 @@ package pulp_cluster_package; localparam int unsigned MAX_NUM_HWPES = 8; - typedef struct { + typedef struct packed { hwpe_type_e [MAX_NUM_HWPES-1:0] HwpeList; byte_t NumHwpes; } hwpe_subsystem_cfg_t; // PULP cluster configuration - typedef struct { + typedef struct packed { // Type of core in the cluster core_type_e CoreType; // Number of cores in the cluster From 0cfbed9e50a9b3c1e94f5be1923618a551403b13 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 27 Feb 2025 17:06:32 +0100 Subject: [PATCH 12/67] bender: Add obi, bump idma, fix_redundancy_cells, update Bender.lock - Several sub-dependencies in the Bender.lock were not aligned with the Bender.yml - obi was in the Bender.lock but not in Bender.yml - redundancy_cells' version in Bender.yml was not supported here --- Bender.lock | 73 +++++++++++++++++++++++++++-------------------------- Bender.yml | 5 ++-- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/Bender.lock b/Bender.lock index 91d2c3ab..98c63684 100644 --- a/Bender.lock +++ b/Bender.lock @@ -7,8 +7,8 @@ packages: dependencies: - common_cells axi: - revision: 853ede23b2a9837951b74dbdc6d18c3eef5bac7d - version: 0.39.5 + revision: 39f5f2d51c5e524f6fc5cf8b6e901f7dcc5622d7 + version: 0.39.6 source: Git: https://github.com/pulp-platform/axi.git dependencies: @@ -16,16 +16,16 @@ packages: - common_verification - tech_cells_generic axi2mem: - revision: b0e963433b2f6a61262b1448031e74eaec57c203 - version: null + revision: be0c696709acaee579787ba2432d26ad27640594 + version: 1.0.2 source: Git: https://github.com/pulp-platform/axi2mem.git dependencies: - axi_slice - common_cells axi2per: - revision: 4932bd2b88a1c7b5f0bf95411fc512905ed32439 - version: null + revision: e8ca052a745e184ca960933b2fe416b725e9ca81 + version: 1.0.2 source: Git: https://github.com/pulp-platform/axi2per.git dependencies: @@ -37,24 +37,23 @@ packages: Git: https://github.com/pulp-platform/axi_slice.git dependencies: - common_cells + axi_stream: + revision: 54891ff40455ca94a37641b9da4604647878cc07 + version: 0.1.1 + source: + Git: https://github.com/pulp-platform/axi_stream.git + dependencies: + - common_cells cluster_icache: - revision: dd0e8f3497903a9ca99fc9f349d5a4f688ceb3ae - version: null + revision: 64e21ae455bbdde850c4df13bef86ea55ac42537 + version: 0.2.0 source: Git: https://github.com/pulp-platform/cluster_icache.git dependencies: - axi - common_cells - - register_interface - scm - tech_cells_generic - axi_stream: - revision: 54891ff40455ca94a37641b9da4604647878cc07 - version: 0.1.1 - source: - Git: https://github.com/pulp-platform/axi_stream.git - dependencies: - - common_cells cluster_interconnect: revision: 1284def6c0b7f7e9355eb093d00883ad9dead1b7 version: null @@ -63,8 +62,8 @@ packages: dependencies: - common_cells cluster_peripherals: - revision: 0b8e8ab9e6be3a5030a18256bb7e75cf6b6f6cac - version: null + revision: e464eb9ddcc39e5a50009819601c4f213b1d4ba3 + version: 2.2.0 source: Git: https://github.com/pulp-platform/cluster_peripherals.git dependencies: @@ -78,13 +77,13 @@ packages: - common_verification - tech_cells_generic common_verification: - revision: 9c07fa860593b2caabd9b5681740c25fac04b878 - version: 0.2.3 + revision: fb1885f48ea46164a10568aeff51884389f67ae3 + version: 0.2.5 source: Git: https://github.com/pulp-platform/common_verification.git dependencies: [] cv32e40p: - revision: e863f576699815b38cc9d80dbdede8ed5efd5991 + revision: 1a93f340e9dadb9f7c8c471f27a40932c8b1c62e version: null source: Git: https://github.com/pulp-platform/cv32e40p.git @@ -124,9 +123,11 @@ packages: - common_cells - hwpe-stream - l2_tcdm_hybrid_interco + - redundancy_cells + - register_interface hier-icache: - revision: 2886cb2a46cea3e2bd2d979b505d88fadfbe150c - version: null + revision: 7243834d2407ca23cff583d57641c84b982bd9bc + version: 1.3.0 source: Git: https://github.com/pulp-platform/hier-icache.git dependencies: @@ -144,8 +145,8 @@ packages: dependencies: - tech_cells_generic hwpe-stream: - revision: 65c99a4a2f37a79acee800ab0151f67dfb1edef1 - version: 1.8.0 + revision: b3d33afdd27e79bcda1348d0ab5f4afd52c03106 + version: 1.9.0 source: Git: https://github.com/pulp-platform/hwpe-stream.git dependencies: @@ -164,10 +165,10 @@ packages: Git: https://github.com/pulp-platform/icache-intc.git dependencies: [] idma: - revision: 4b3393b033b15641b8669970462ff39f4e00d207 + revision: 92799c5fabcf2cd1f224f970bbb040b290d08fd5 version: null source: - Git: https://github.com/pulp-platform/iDMA.git + Git: https://github.com/pulp-platform/idma.git dependencies: - axi - axi_stream @@ -182,8 +183,8 @@ packages: Git: https://github.com/pulp-platform/L2_tcdm_hybrid_interco.git dependencies: [] mchan: - revision: 7f064f205a3e0203e959b14773c4afecf56681ab - version: null + revision: 3f2ae92f78e2ddbd0e079cbb4f81fcc248171c12 + version: 1.2.4 source: Git: https://github.com/pulp-platform/mchan.git dependencies: @@ -199,23 +200,23 @@ packages: - hwpe-stream - zeroriscy obi: - revision: 2ee698211db5c007bd0d207ebd646ce79f0472ec - version: null + revision: c2141a653c755461ff44f61d12aeb5d99fc8e760 + version: 0.1.3 source: Git: https://github.com/pulp-platform/obi.git dependencies: - common_cells - common_verification per2axi: - revision: 95bf23119b47fc171d9ed3734c431f71cffd9350 - version: null + revision: 18cf4f2ad51b73de0448843ce0def54ab5fb274b + version: 1.0.5 source: Git: https://github.com/pulp-platform/per2axi.git dependencies: - axi_slice redmule: revision: 9223ccc932e21d0667e9c2d30831db41eec9299e - version: + version: null source: Git: https://github.com/pulp-platform/redmule.git dependencies: @@ -228,7 +229,7 @@ packages: - register_interface - tech_cells_generic redundancy_cells: - revision: 49e714b97a19a7aaddf064ae2757c8f02d1f62dc + revision: 9e31f7c6c24877eaf58279903e7a162b16c9a721 version: null source: Git: https://github.com/pulp-platform/redundancy_cells.git @@ -263,7 +264,7 @@ packages: dependencies: - tech_cells_generic softex: - revision: 31e7534a3da3244f2f5c08bab430a560beee6ff2 + revision: 11dd29e85d40e29fea0481b471f1c0cc967df1a4 version: null source: Git: https://github.com/belanoa/softex.git diff --git a/Bender.yml b/Bender.yml index fe6856a1..d7f2ba22 100644 --- a/Bender.yml +++ b/Bender.yml @@ -18,7 +18,7 @@ dependencies: cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", rev: v1.3.0 } event_unit_flex: { git: "https://github.com/pulp-platform/event_unit_flex.git", rev: astral-v1.0 } mchan: { git: "https://github.com/pulp-platform/mchan.git", rev: v1.2.4 } - idma: { git: "https://github.com/pulp-platform/idma.git", rev: "4b3393b033b15641b8669970462ff39f4e00d207" } # branch: smazzola/chimera + idma: { git: "https://github.com/pulp-platform/idma.git", rev: "92799c5fabcf2cd1f224f970bbb040b290d08fd5" } # branch: smazzola/chimera hier-icache: { git: "https://github.com/pulp-platform/hier-icache.git", rev: v1.3.0 } cluster_icache: { git: "https://github.com/pulp-platform/cluster_icache.git", rev: v0.2.0 } cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", rev: v2.2.0 } @@ -32,10 +32,11 @@ dependencies: scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } hci: { git: "https://github.com/pulp-platform/hci.git", rev: 2d5ae7877125a8b5415ae343f1c7c401d4596e15 } # branch: smazzola/chimera register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 } - redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: astral-v1.0 } + redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 9e31f7c6c24877eaf58279903e7a162b16c9a721 } # branch: astral-v0 redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 } neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: 8e0883bd0c19c6844122456c749c2bdfde18011f } softex: { git: "https://github.com/belanoa/softex.git" , rev: astral-v1.0 } + obi: { git: "https://github.com/pulp-platform/obi.git", rev: v0.1.3 } export_include_dirs: - include From 3e1bab8caa252b79a9c2de5f8b8abdd56bcfae34 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 27 Feb 2025 17:06:59 +0100 Subject: [PATCH 13/67] hw: Fix idma_wrap for updated obi v0.1.3 --- rtl/idma_wrap.sv | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv index fe5cc8c0..dc34dadd 100644 --- a/rtl/idma_wrap.sv +++ b/rtl/idma_wrap.sv @@ -697,7 +697,7 @@ module dmac_wrap #( obi_rready_converter #( .obi_a_chan_t(obi_a_chan_t), .obi_r_chan_t(obi_r_chan_t), - .DEPTH(1) + .Depth(1) ) obi_rready_converter_reorg_i ( .clk_i, .rst_ni, @@ -710,17 +710,19 @@ module dmac_wrap #( .rvalid_o(obi_reorg_rsp_to_dma[s].rvalid), .mgr_a_chan_o(obi_reorg_req_from_rrc[s].a), .req_o(obi_reorg_req_from_rrc[s].req), - .rready_o(obi_reorg_req_from_rrc[s].rready), .mgr_r_chan_i(obi_reorg_rsp_to_rrc[s].r), .gnt_i(obi_reorg_rsp_to_rrc[s].gnt), .rvalid_i(obi_reorg_rsp_to_rrc[s].rvalid) ); + // We are always ready for responses, because we don't + // send more requests than we can absorb in the fifo + assign obi_reorg_req_from_rrc[s].rready = 1'b1; end // else: !if(MUX_READ) obi_rready_converter #( .obi_a_chan_t(obi_a_chan_t), .obi_r_chan_t(obi_r_chan_t), - .DEPTH(1) + .Depth(1) ) obi_rready_converter_read_i ( .clk_i, .rst_ni, @@ -733,18 +735,19 @@ module dmac_wrap #( .rvalid_o(obi_read_rsp_to_mux[s].rvalid), .mgr_a_chan_o(obi_read_req_from_rrc[s].a), .req_o(obi_read_req_from_rrc[s].req), - .rready_o(obi_read_req_from_rrc[s].rready), .mgr_r_chan_i(obi_read_rsp_to_rrc[s].r), .gnt_i(obi_read_rsp_to_rrc[s].gnt), .rvalid_i(obi_read_rsp_to_rrc[s].rvalid) ); - + // We are always ready for responses, because we don't + // send more requests than we can absorb in the fifo + assign obi_read_req_from_rrc[s].rready = 1'b1; obi_rready_converter #( .obi_a_chan_t(obi_a_chan_t), .obi_r_chan_t(obi_r_chan_t), - .DEPTH(1) + .Depth(1) ) obi_rready_converter_wr_i ( .clk_i, .rst_ni, @@ -757,11 +760,12 @@ module dmac_wrap #( .rvalid_o(obi_write_rsp_to_dma[s].rvalid), .mgr_a_chan_o(obi_write_req_from_rrc[s].a), .req_o(obi_write_req_from_rrc[s].req), - .rready_o(obi_write_req_from_rrc[s].rready), .mgr_r_chan_i(obi_write_rsp_to_rrc[s].r), .gnt_i(obi_write_rsp_to_rrc[s].gnt), .rvalid_i(obi_write_rsp_to_rrc[s].rvalid) ); + // Same as above + assign obi_write_req_from_rrc[s].rready = 1'b1; end From 00d183e23a82e794145113683ad67cf579536015 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 27 Feb 2025 18:27:48 +0100 Subject: [PATCH 14/67] makefile: Add target to generate iDMA hardware --- .gitignore | 1 + Makefile | 31 +++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 6551a7a6..bb15cae9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ scripts/compile.tcl regression_tests/ pulp-runtime/ fault_injection_sim/ +venv/ diff --git a/Makefile b/Makefile index 3bd44e19..cf284c90 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ QUESTA ?= endif BENDER ?= bender +PYTHON ?= python3 VSIM ?= $(QUESTA) vsim VOPT ?= $(QUESTA) vopt @@ -98,13 +99,39 @@ regression_tests: cd $@ && git checkout $(REGRESSION_TESTS_COMMIT) cd $@ && git submodule update --init --recursive +######################### +# Hardware dependencies # +######################### + +# Set dependency paths only if dependencies have already been cloned +# This avoids running `bender checkout` at every make command +ifeq ($(shell test -d $(ROOT_DIR)/.bender || echo 1),) +IDMA_ROOT := $(shell $(BENDER) path idma) +endif + +# Fall back to safe defaults if dependencies are not cloned yet +IDMA_ROOT ?= . + +# Python requirements (version and packages) coming from iDMA repository +gen_idma_hw: + @$(PYTHON) --version >/dev/null 2>&1 || { echo "ERROR: Python not found. Python 3.8 or higher is required."; exit 1; } && \ + $(PYTHON) -c "import sys; assert sys.version_info >= (3, 8)" || { echo "ERROR: Python version must be 3.8 or higher"; exit 1; } && \ + $(PYTHON) -m venv venv && \ + . venv/bin/activate && \ + pip install --upgrade pip && \ + pip install -r $(IDMA_ROOT)/requirements.txt && \ + make -C $(IDMA_ROOT) idma_hw_all + +clean_idma_hw: + make -C $(IDMA_ROOT) idma_clean_all + ######################## # Build and simulation # ######################## .PHONY: sim-clean compile build run -sim-clean: +sim-clean: clean_idma_hw rm -rf scripts/compile.tcl rm -rf work @@ -121,7 +148,7 @@ scripts/synth-compile.tcl: | Bender.lock $(library): $(QUESTA) vlib $(library) -compile: $(library) +compile: gen_idma_hw $(library) @test -f Bender.lock || { echo "ERROR: Bender.lock file does not exist. Did you run make checkout in bender mode?"; exit 1; } @test -f scripts/compile.tcl || { echo "ERROR: scripts/compile.tcl file does not exist. Did you run make scripts in bender mode?"; exit 1; } $(VSIM) -c -do 'quit -code [source scripts/compile.tcl]' From c7ea8092310c5d6d612063ab79efea0204664c79 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 6 Mar 2025 18:42:11 +0100 Subject: [PATCH 15/67] hw: :bug: Fix cluster_id_i tied to 0 in cluster_interco and core_demux It created problems in routing of requests through peripheral interconnect --- rtl/pulp_cluster.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index ec763412..6880ff93 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -764,7 +764,7 @@ cluster_interconnect_wrap #( ) cluster_interconnect_wrap_i ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), - .cluster_id_i ( '0 ), + .cluster_id_i ( cluster_id_i ), .hci_ecc_periph_slave ( s_periph_hwpe_hci_ecc_bus ), @@ -1111,7 +1111,7 @@ generate .test_en_i ( test_mode_i ), .clk_en_i ( clk_core_en[i] ), .base_addr_i ( base_addr_i ), - .cluster_id_i ( '0 ), + .cluster_id_i ( cluster_id_i ), .ext_perf_o ( ext_perf[i] ), .core_data_req_i ( demux_data_req[i] ), .core_data_rsp_o ( demux_data_rsp[i] ), From 07ca6db7ff2010bdf5898d0f7c5fb998b11a663c Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 11 Mar 2025 12:41:40 +0100 Subject: [PATCH 16/67] bender,hw: Bump hci to align ECC and non-ECC HCI interfaces --- Bender.lock | 2 +- Bender.yml | 2 +- rtl/cluster_interconnect_wrap.sv | 13 +------------ 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/Bender.lock b/Bender.lock index 98c63684..e5a53e84 100644 --- a/Bender.lock +++ b/Bender.lock @@ -114,7 +114,7 @@ packages: dependencies: - common_cells hci: - revision: 2d5ae7877125a8b5415ae343f1c7c401d4596e15 + revision: 4257a73b391731d94077ad72fc528c27f5b11392 version: null source: Git: https://github.com/pulp-platform/hci.git diff --git a/Bender.yml b/Bender.yml index d7f2ba22..8db752db 100644 --- a/Bender.yml +++ b/Bender.yml @@ -30,7 +30,7 @@ dependencies: cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 } ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } - hci: { git: "https://github.com/pulp-platform/hci.git", rev: 2d5ae7877125a8b5415ae343f1c7c401d4596e15 } # branch: smazzola/chimera + hci: { git: "https://github.com/pulp-platform/hci.git", rev: 4257a73b391731d94077ad72fc528c27f5b11392 } # branch: smazzola/chimera register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 9e31f7c6c24877eaf58279903e7a162b16c9a721 } # branch: astral-v0 redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 } diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index c9172627..6a86226d 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -162,7 +162,7 @@ module cluster_interconnect_wrap .ctrl_i ( hci_ctrl_i ), .periph_hci_ecc ( hci_ecc_periph_slave ), .cores ( core_tcdm_slave ), - .hwpe ( s_hwpe_intc[0] ), + .hwpe ( s_hwpe_intc ), .dma ( s_dma_intc ), .ext ( ext_slave ), .mems ( tcdm_sram_master ) @@ -308,15 +308,4 @@ module cluster_interconnect_wrap .mperiph_slave ( mperiph_slave ) ); - // pragma translate_off - `ifndef VERILATOR - initial begin : p_assert - if (N_HCI_HWPE_PORTS > 1) begin - assert(!USE_ECC_INTERCONNECT) - else $fatal(1, "If USE_ECC_INTERCONNECT is enabled, N_HCI_HWPE_PORTS cannot be more than 1."); - end - end - `endif - // pragma translate_on - endmodule From 5730f62676091f345004385a3ce3f1ee35f2b9de Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 11 Feb 2025 17:53:44 +0100 Subject: [PATCH 17/67] makefile: Bump pulp-runtime, regression_tests, and nonfree revs --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index cf284c90..4334cf22 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ endef ###################### NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git -NONFREE_COMMIT ?= 67079fe +NONFREE_COMMIT ?= 4267271dca54a6c6592336919034ee6e5573974a # branch: smazzola/chimera nonfree-init: git clone $(NONFREE_REMOTE) nonfree @@ -76,7 +76,7 @@ sw-clean: ## Clone pulp-runtime as SW stack PULP_RUNTIME_REMOTE ?= https://github.com/pulp-platform/pulp-runtime.git -PULP_RUNTIME_COMMIT ?= b3c239c # branch: lg/upstream +PULP_RUNTIME_COMMIT ?= 749c08e2036ddcb7baaa956c09a556f8b386b66f # branch: smazzola/chimera pulp-runtime: git clone $(PULP_RUNTIME_REMOTE) $@ @@ -92,7 +92,7 @@ fault_injection_sim: ## Clone regression tests REGRESSION_TESTS_REMOTE ?= https://github.com/pulp-platform/regression_tests.git -REGRESSION_TESTS_COMMIT ?= d43cb0d # branch: lg/upstream +REGRESSION_TESTS_COMMIT ?= 53e038baec991aa94e113ecefc03ca6377e56f85 # branch: smazzola/chimera regression_tests: git clone $(REGRESSION_TESTS_REMOTE) $@ From a3ac2c28b9ba3e56e4fe25b4babadf4e99874e84 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Fri, 11 Jul 2025 13:09:57 +0200 Subject: [PATCH 18/67] ci: Disable ECC --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4334cf22..79ac1ae3 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ endef ###################### NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git -NONFREE_COMMIT ?= 4267271dca54a6c6592336919034ee6e5573974a # branch: smazzola/chimera +NONFREE_COMMIT ?= f492530603be007bdc82c37bc0ebae6599d7c7a2 # branch: smazzola/chimera nonfree-init: git clone $(NONFREE_REMOTE) nonfree From 32e5a07ce11302cc2f18a98ca317e12b200dcb96 Mon Sep 17 00:00:00 2001 From: Georg Rutishauser Date: Fri, 7 Jun 2024 16:19:31 +0200 Subject: [PATCH 19/67] fix printing from all cores --- tb/mock_uart.sv | 2 +- tb/mock_uart_axi.sv | 52 ++++++++++++++++++++++----------------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tb/mock_uart.sv b/tb/mock_uart.sv index 9428b6c6..9714dab1 100644 --- a/tb/mock_uart.sv +++ b/tb/mock_uart.sv @@ -60,7 +60,7 @@ module mock_uart #( function void uart_tx(byte ch); if(ch==8'h0A) begin - $display("[TB UART %2d] %s", UART_IDX, stringa); + $display("[TB UART %2d] %s", UART_IDX, stringa); charnum = 0; stringa = '0; end else begin diff --git a/tb/mock_uart_axi.sv b/tb/mock_uart_axi.sv index 5c580796..112b13f9 100644 --- a/tb/mock_uart_axi.sv +++ b/tb/mock_uart_axi.sv @@ -19,6 +19,7 @@ module mock_uart_axi #( parameter int unsigned AxiUw = 0, parameter logic [AxiAw-1:0] BaseAddr = 0, parameter int unsigned N_CORES = 8 + )( input logic clk_i, input logic rst_ni, @@ -26,14 +27,14 @@ module mock_uart_axi #( AXI_BUS.Slave uart ); - logic uart_penable; - logic uart_pwrite; - logic [AxiAw-1:0] uart_paddr; - logic [N_CORES-1:0] uart_psel; - logic [31:0] uart_pwdata; - logic [N_CORES-1:0][31:0] uart_prdata ; - logic [N_CORES-1:0] uart_pready; - logic [N_CORES-1:0] uart_pslverr; + logic uart_penable; + logic uart_pwrite; + logic [AxiAw-1:0] uart_paddr; + logic [N_CORES-1:0] uart_psel; + logic [31:0] uart_pwdata; + logic [N_CORES-1:0] [31:0] uart_prdata; + logic [N_CORES-1:0] uart_pready; + logic [N_CORES-1:0] uart_pslverr; AXI_LITE #( .AXI_DATA_WIDTH(AxiDw), @@ -81,9 +82,9 @@ module mock_uart_axi #( rule_t [0:N_CORES-1] rule; // each mock UART only has 2 words of address space - for (genvar g = 0; g < N_CORES; g++) begin: gen_rule + for (genvar g=0; g Date: Tue, 3 Jun 2025 12:18:42 +0200 Subject: [PATCH 24/67] [wip] bump versions --- .gitignore | 1 + Bender.lock | 20 ++++++++++---------- Bender.yml | 4 ++-- Makefile | 12 ++++++++++-- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index bb15cae9..a2dc6bf9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ regression_tests/ pulp-runtime/ fault_injection_sim/ venv/ +.venv/ diff --git a/Bender.lock b/Bender.lock index e5a53e84..956f072c 100644 --- a/Bender.lock +++ b/Bender.lock @@ -7,8 +7,8 @@ packages: dependencies: - common_cells axi: - revision: 39f5f2d51c5e524f6fc5cf8b6e901f7dcc5622d7 - version: 0.39.6 + revision: f07498d53ecd5518b277c7d213ec3b71ca4df93c + version: 0.39.7 source: Git: https://github.com/pulp-platform/axi.git dependencies: @@ -55,8 +55,8 @@ packages: - scm - tech_cells_generic cluster_interconnect: - revision: 1284def6c0b7f7e9355eb093d00883ad9dead1b7 - version: null + revision: 2967d8d17be0a6139229ca8d3d4956e182aec3de + version: 1.3.0 source: Git: https://github.com/pulp-platform/cluster_interconnect.git dependencies: @@ -69,8 +69,8 @@ packages: dependencies: - hci common_cells: - revision: c27bce39ebb2e6bae52f60960814a2afca7bd4cb - version: 1.37.0 + revision: 9afda9abb565971649c2aa0985639c096f351171 + version: 1.38.0 source: Git: https://github.com/pulp-platform/common_cells.git dependencies: @@ -165,8 +165,8 @@ packages: Git: https://github.com/pulp-platform/icache-intc.git dependencies: [] idma: - revision: 92799c5fabcf2cd1f224f970bbb040b290d08fd5 - version: null + revision: ff5d56fffb3767814db88d6bf8f381974ea33aa5 + version: 0.6.4 source: Git: https://github.com/pulp-platform/idma.git dependencies: @@ -257,8 +257,8 @@ packages: - fpnew - tech_cells_generic scm: - revision: 74426dee36f28ae1c02f7635cf844a0156145320 - version: null + revision: 472f99affe44ff7b282b519c047a3cfeb35b16c6 + version: 1.2.0 source: Git: https://github.com/pulp-platform/scm.git dependencies: diff --git a/Bender.yml b/Bender.yml index 8db752db..647c891c 100644 --- a/Bender.yml +++ b/Bender.yml @@ -18,7 +18,7 @@ dependencies: cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", rev: v1.3.0 } event_unit_flex: { git: "https://github.com/pulp-platform/event_unit_flex.git", rev: astral-v1.0 } mchan: { git: "https://github.com/pulp-platform/mchan.git", rev: v1.2.4 } - idma: { git: "https://github.com/pulp-platform/idma.git", rev: "92799c5fabcf2cd1f224f970bbb040b290d08fd5" } # branch: smazzola/chimera + idma: { git: "https://github.com/pulp-platform/idma.git", rev: v0.6.4 } hier-icache: { git: "https://github.com/pulp-platform/hier-icache.git", rev: v1.3.0 } cluster_icache: { git: "https://github.com/pulp-platform/cluster_icache.git", rev: v0.2.0 } cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", rev: v2.2.0 } @@ -31,7 +31,7 @@ dependencies: ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } hci: { git: "https://github.com/pulp-platform/hci.git", rev: 4257a73b391731d94077ad72fc528c27f5b11392 } # branch: smazzola/chimera - register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 } + register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.5 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 9e31f7c6c24877eaf58279903e7a162b16c9a721 } # branch: astral-v0 redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 } neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: 8e0883bd0c19c6844122456c749c2bdfde18011f } diff --git a/Makefile b/Makefile index 79ac1ae3..b5e8e7a6 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ sw-clean: ## Clone pulp-runtime as SW stack PULP_RUNTIME_REMOTE ?= https://github.com/pulp-platform/pulp-runtime.git -PULP_RUNTIME_COMMIT ?= 749c08e2036ddcb7baaa956c09a556f8b386b66f # branch: smazzola/chimera +PULP_RUNTIME_COMMIT ?= 8000372999b5933317c357367b9d2f445a3cbfd0 # branch: dkeller/chimera-v2 pulp-runtime: git clone $(PULP_RUNTIME_REMOTE) $@ @@ -92,7 +92,7 @@ fault_injection_sim: ## Clone regression tests REGRESSION_TESTS_REMOTE ?= https://github.com/pulp-platform/regression_tests.git -REGRESSION_TESTS_COMMIT ?= 53e038baec991aa94e113ecefc03ca6377e56f85 # branch: smazzola/chimera +REGRESSION_TESTS_COMMIT ?= 9ead1c0f6197e79d731749789f381ad83c7d81a2 # branch: dkeller/chimera-v2 regression_tests: git clone $(REGRESSION_TESTS_REMOTE) $@ @@ -160,6 +160,14 @@ run: $(VSIM) +permissive -suppress 3053 -suppress 8885 -lib $(library) +MAX_CYCLES=$(max_cycles) +UVM_TESTNAME=$(test_case) +APP=$(elf-bin) +notimingchecks +nospecify -t 1ps \ ${top_level}_optimized +permissive-off ++$(elf-bin) ++$(target-options) ++$(cl-bin) | tee sim.log +.PHONY: clean + +clean: + rm -rf Bender.lock + rm -rf scripts/synth-compile.tcl + rm -rf scripts/compile.tcl + rm -rf transcript + rm -rf *.log #################### # Regression tests # #################### From 5aa3ff22d831f7998f82ce78403036f9fae65c5e Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Wed, 4 Jun 2025 10:56:05 +0200 Subject: [PATCH 25/67] Update idma_wrap.sv to double byte width for BE_WIDTH --- rtl/idma_wrap.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv index bfe46b46..00a7d48c 100644 --- a/rtl/idma_wrap.sv +++ b/rtl/idma_wrap.sv @@ -246,7 +246,7 @@ module dmac_wrap #( periph_to_reg #( .AW (RegAddrWidth), .DW (DATA_WIDTH), - .BW (BE_WIDTH), + .BW (BE_WIDTH<<1), .IW (PE_ID_WIDTH), .req_t(dma_regs_req_t), .rsp_t(dma_regs_rsp_t) From 3b8c21b8a0f2b6cf0aadad7af1c20117097e0a99 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Wed, 23 Jul 2025 21:44:34 +0200 Subject: [PATCH 26/67] [wip] parametrization of wide port --- packages/pulp_cluster_package.sv | 6 ++ rtl/idma_wrap.sv | 4 +- rtl/pulp_cluster.sv | 98 ++++++++++++++++++++++++++++---- 3 files changed, 94 insertions(+), 14 deletions(-) diff --git a/packages/pulp_cluster_package.sv b/packages/pulp_cluster_package.sv index f5bef1a0..439bf703 100644 --- a/packages/pulp_cluster_package.sv +++ b/packages/pulp_cluster_package.sv @@ -178,6 +178,11 @@ package pulp_cluster_package; doub_t ClusterExternalOffs; // Address remap for virtualization bit EnableRemapAddress; + // Enable wide AXI master port for high-bandwidth DMA transfers + // When disabled (0): Wide AXI ports are tied off, DMA uses narrow transfers only + // When enabled (1): DMA can use wide AXI port for high-bandwidth transfers + // Automatically disabled when using MCHAN (TARGET_MCHAN defined) + bit EnableWidePort; // Enable Snitch ICache bit SnitchICache; } pulp_cluster_cfg_t; @@ -276,6 +281,7 @@ package pulp_cluster_package; ClusterPeriphOffs: 'h00200000, ClusterExternalOffs: 'h00400000, EnableRemapAddress: 0, + EnableWidePort: 1, SnitchICache: 0, default: '0 }; diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv index 00a7d48c..7c38fab1 100644 --- a/rtl/idma_wrap.sv +++ b/rtl/idma_wrap.sv @@ -39,7 +39,7 @@ module dmac_wrap #( parameter int unsigned GLOBAL_QUEUE_DEPTH = 2, // mux read ports between tcdm-tcdm and tcdm-axi? parameter bit MUX_READ = 1'b0, - parameter bit TCDM_MEM2BANKS = 1'b0, + parameter bit TCDM_MEM2BANKS = 1'b0, // when using mem2banks (implies AXI_DATA_WIDTH==64): // 4 ports per stream if read ports muxed, otherwise 6 // when not using mem2banks: @@ -177,7 +177,7 @@ module dmac_wrap #( axi_req_t [ NumStreams-1:0] dma_req; axi_resp_t [ NumStreams-1:0] dma_rsp; - // interface to structs + // interface to structs for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_connect_interface assign ext_master_req_o[s] = soc_req[s]; assign soc_rsp[s] = ext_master_resp_i[s]; diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 6880ff93..3e7d1071 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -339,6 +339,22 @@ logic s_dma_cl_irq; logic s_dma_fc_event; logic s_dma_fc_irq; +// Determine if wide AXI port should be enabled based on DMA type and configuration +// - MCHAN: Always disable wide port (uses narrow port only) +// - iDMA: Use Cfg.EnableWidePort parameter +`ifdef TARGET_MCHAN + localparam bit WidePortShouldBeEnabled = 1'b0; // MCHAN never needs wide ports +`else + localparam bit WidePortShouldBeEnabled = Cfg.EnableWidePort; // User-configurable for iDMA +`endif + +// Wide AXI infrastructure: Conditional implementation based on EnableWidePort +// - MCHAN: Always uses narrow transfers (WidePortShouldBeEnabled = 0) +// - iDMA with EnableWidePort=1: Uses wide transfers (256-bit AXI) +// - iDMA with EnableWidePort=0: Uses narrow transfers (64-bit AXI) +// - Wide infrastructure present for interface compatibility +// - Narrow DMA master merged with cluster bus master when wide disabled + logic [Cfg.NumCores-1:0] hmr_barrier_matched; logic [Cfg.NumCores-1:0] hmr_dmr_sw_resynch_req, hmr_tmr_sw_resynch_req; logic [Cfg.NumCores-1:0] hmr_dmr_sw_synch_req, hmr_tmr_sw_synch_req; @@ -353,6 +369,7 @@ localparam DMA_IW_CONTRIB_FAC = Cfg.DmaUseHwpePort ? 0 : 1; // if using MCHAN, must be 32 localparam int unsigned DMA_HCI_DATA_WIDTH = Cfg.DmaUseHwpePort ? Cfg.AxiDataOutWideWidth : DataWidth; + localparam hci_package::hci_size_parameter_t HciCoreSizeParam = '{ DW: DataWidth, AW: AddrWidth, @@ -578,8 +595,11 @@ hci_core_intf #( c2s_in_int_req_t s_core_instr_bus_req; c2s_in_int_resp_t s_core_instr_bus_resp; - c2s_wide_req_t s_dma_master_req; + // DMA master signals - always declared, conditionally connected + c2s_wide_req_t s_dma_master_req; // Wide DMA master (256-bit) c2s_wide_resp_t s_dma_master_resp; + c2s_out_int_req_t s_dma_narrow_master_req; // Narrow DMA master (64-bit) + c2s_out_int_resp_t s_dma_narrow_master_resp; // core per2axi -> ext @@ -790,15 +810,15 @@ dmac_wrap #( .NB_CORES ( Cfg.NumCores ), .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), - .AXI_DATA_WIDTH ( Cfg.AxiDataOutWideWidth ), - .AXI_ID_WIDTH ( Cfg.AxiIdOutWideWidth ), + .AXI_DATA_WIDTH ( WidePortShouldBeEnabled ? Cfg.AxiDataOutWideWidth : Cfg.AxiDataOutWidth ), + .AXI_ID_WIDTH ( WidePortShouldBeEnabled ? Cfg.AxiIdOutWideWidth : AxiIdOutWidth ), .AXI_USER_WIDTH ( Cfg.AxiUserWidth ), .PE_ID_WIDTH ( Cfg.NumCores + 1 ), .DATA_WIDTH ( DataWidth ), .ADDR_WIDTH ( AddrWidth ), .BE_WIDTH ( BeWidth ), - .axi_req_t ( c2s_wide_req_t ), - .axi_resp_t ( c2s_wide_resp_t ), + .axi_req_t ( WidePortShouldBeEnabled ? c2s_wide_req_t : c2s_out_int_req_t ), + .axi_resp_t ( WidePortShouldBeEnabled ? c2s_wide_resp_t : c2s_out_int_resp_t ), `ifdef TARGET_MCHAN .NB_CTRLS ( Cfg.NumCores + 2 ), .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ), @@ -818,11 +838,11 @@ dmac_wrap #( .ctrl_slave ( s_core_dmactrl_bus ), .tcdm_master ( s_hci_dma ), `ifdef TARGET_MCHAN - .ext_master_req_o ( s_dma_master_req ), - .ext_master_resp_i ( s_dma_master_resp ), + .ext_master_req_o ( /* MCHAN uses narrow port - not connected to wide */ ), + .ext_master_resp_i ( '0 ), `else - .ext_master_req_o ( {s_dma_master_req} ), - .ext_master_resp_i ( {s_dma_master_resp} ), + .ext_master_req_o ( WidePortShouldBeEnabled ? {s_dma_master_req} : {s_dma_narrow_master_req} ), + .ext_master_resp_i ( WidePortShouldBeEnabled ? {s_dma_master_resp} : {s_dma_narrow_master_resp} ), `endif .term_event_o ( s_dma_event ), .term_irq_o ( s_dma_irq ), @@ -1664,8 +1684,57 @@ c2s_resp_t src_resp, isolate_src_resp; c2s_remap_req_t src_remap_req; c2s_remap_resp_t src_remap_resp; -`AXI_ASSIGN_REQ_STRUCT(src_remap_req,s_data_master_req) -`AXI_ASSIGN_RESP_STRUCT(s_data_master_resp,src_remap_resp) +// Connect DMA narrow master when wide port disabled, otherwise cluster bus master +if (WidePortShouldBeEnabled) begin : gen_cluster_bus_narrow_master + `AXI_ASSIGN_REQ_STRUCT(src_remap_req,s_data_master_req) + `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp,src_remap_resp) +end else begin : gen_dma_narrow_master + // Merge cluster bus master and DMA narrow master + c2s_remap_req_t [1:0] narrow_master_reqs; + c2s_remap_resp_t [1:0] narrow_master_resps; + + `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[0],s_data_master_req) // Cluster bus + `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[1],s_dma_narrow_master_req) // DMA narrow + `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp,narrow_master_resps[0]) + `AXI_ASSIGN_RESP_STRUCT(s_dma_narrow_master_resp,narrow_master_resps[1]) + + // Simple AXI crossbar to merge two narrow masters + axi_xbar #( + .Cfg ( '{ + NoSlvPorts: 2, + NoMstPorts: 1, + MaxMstTrans: 4, + MaxSlvTrans: 4, + FallThrough: 1'b0, + LatencyMode: axi_pkg::CUT_ALL_AX, + PipelineStages: 0, + AxiIdWidthSlvPorts: AxiIdOutWidth, + AxiIdUsedSlvPorts: AxiIdOutWidth, + UniqueIds: 1'b1, + AxiAddrWidth: Cfg.AxiAddrWidth, + AxiDataWidth: Cfg.AxiDataOutWidth, + NoAddrRules: 1 + } ), + .ATOPs ( 1'b1 ), + .Connectivity ( '1 ), + .slv_req_t ( c2s_remap_req_t ), + .slv_resp_t ( c2s_remap_resp_t ), + .mst_req_t ( c2s_remap_req_t ), + .mst_resp_t ( c2s_remap_resp_t ), + .rule_t ( axi_pkg::xbar_rule_32_t ) + ) i_narrow_master_xbar ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_i ( test_mode_i ), + .slv_ports_req_i ( narrow_master_reqs ), + .slv_ports_resp_o( narrow_master_resps ), + .mst_ports_req_o ( src_remap_req ), + .mst_ports_resp_i( src_remap_resp ), + .addr_map_i ( '{'{idx: 0, start_addr: '0, end_addr: '1}} ), + .en_default_mst_port_i( '1 ), + .default_mst_port_i ( '0 ) + ); +end if (Cfg.AxiIdOutWidth != AxiIdOutWidth) begin : gen_c2s_idwremap axi_id_remap #( @@ -1767,7 +1836,7 @@ axi_isolate #( .slv_resp_o ( isolate_src_wide_resp ), .mst_req_o ( src_wide_req ), .mst_resp_i ( src_wide_resp ), - .isolate_i ( axi_isolate_synch ), + .isolate_i ( axi_isolate_synch ), .isolated_o ( axi_isolated_wide_o ) ); @@ -1967,12 +2036,17 @@ initial begin : p_assert else $fatal(1, "When using MCHAN, Cfg.DmaNumPlugs must be 4!"); assert(!Cfg.DmaUseHwpePort) else $fatal(1, "When using MCHAN, Cfg.DmaUseHwpePort must be 0!"); + assert(!WidePortShouldBeEnabled) + else $fatal(1, "When using MCHAN, wide port should be disabled!"); `else if (!Cfg.DmaUseHwpePort) begin // The DMA can have wide access to TCDM only when sharing the master port to HCI with the HWPE assert(DMA_HCI_DATA_WIDTH == DataWidth) else $fatal(1, "When Cfg.DmaUseHwpePort is 0, DMA_HCI_DATA_WIDTH must be equal to DataWidth!"); end + // Note: iDMA now uses conditional data width and AXI path selection + // EnableWidePort=0: iDMA uses 64-bit narrow transfers via cluster bus AXI path + // EnableWidePort=1: iDMA uses 256-bit wide transfers via dedicated wide AXI path `endif end `endif From 44886914ebf15e822beaefd6d26b547480fd76ea Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 24 Jul 2025 14:35:03 +0200 Subject: [PATCH 27/67] Refactor dmac_wrap instantiation to support both wide and narrow port configurations based on WidePortShouldBeEnabled. Update AXI request/response handling and introduce a multiplexer for merging cluster bus and DMA narrow master requests. Enhance isolation and CDC instantiation for wide port scenarios. --- rtl/pulp_cluster.sv | 360 ++++++++++++++++++++++++++------------------ 1 file changed, 217 insertions(+), 143 deletions(-) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 3e7d1071..908d2341 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -806,50 +806,97 @@ cluster_interconnect_wrap #( //*************************************************** //*********************DMAC WRAP********************* //*************************************************** -dmac_wrap #( - .NB_CORES ( Cfg.NumCores ), - .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), - .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), - .AXI_DATA_WIDTH ( WidePortShouldBeEnabled ? Cfg.AxiDataOutWideWidth : Cfg.AxiDataOutWidth ), - .AXI_ID_WIDTH ( WidePortShouldBeEnabled ? Cfg.AxiIdOutWideWidth : AxiIdOutWidth ), - .AXI_USER_WIDTH ( Cfg.AxiUserWidth ), - .PE_ID_WIDTH ( Cfg.NumCores + 1 ), - .DATA_WIDTH ( DataWidth ), - .ADDR_WIDTH ( AddrWidth ), - .BE_WIDTH ( BeWidth ), - .axi_req_t ( WidePortShouldBeEnabled ? c2s_wide_req_t : c2s_out_int_req_t ), - .axi_resp_t ( WidePortShouldBeEnabled ? c2s_wide_resp_t : c2s_out_int_resp_t ), -`ifdef TARGET_MCHAN - .NB_CTRLS ( Cfg.NumCores + 2 ), - .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ), - .TCDM_ADD_WIDTH ( TcdmAddrWidth ) -`else - .NB_PE_PORTS ( 2 ), - .NUM_BIDIR_STREAMS ( 1 ), - .GLOBAL_QUEUE_DEPTH ( 2 ), - .MUX_READ ( 1'b1 ), - .TCDM_MEM2BANKS ( !Cfg.DmaUseHwpePort ) -`endif -) dmac_wrap_i ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .test_mode_i ( test_mode_i ), - .pe_ctrl_slave ( s_periph_dma_bus[1:0] ), - .ctrl_slave ( s_core_dmactrl_bus ), - .tcdm_master ( s_hci_dma ), -`ifdef TARGET_MCHAN - .ext_master_req_o ( /* MCHAN uses narrow port - not connected to wide */ ), - .ext_master_resp_i ( '0 ), -`else - .ext_master_req_o ( WidePortShouldBeEnabled ? {s_dma_master_req} : {s_dma_narrow_master_req} ), - .ext_master_resp_i ( WidePortShouldBeEnabled ? {s_dma_master_resp} : {s_dma_narrow_master_resp} ), -`endif - .term_event_o ( s_dma_event ), - .term_irq_o ( s_dma_irq ), - .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ), - .term_irq_pe_o ( {s_dma_fc_irq, s_dma_cl_irq} ), - .busy_o ( s_dmac_busy ) -); +if (WidePortShouldBeEnabled) begin : gen_wide_port_idma + dmac_wrap #( + .NB_CORES ( Cfg.NumCores ), + .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), + .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), + .AXI_DATA_WIDTH ( Cfg.AxiDataOutWideWidth ), + .AXI_ID_WIDTH ( Cfg.AxiIdOutWideWidth ), + .AXI_USER_WIDTH ( Cfg.AxiUserWidth ), + .PE_ID_WIDTH ( Cfg.NumCores + 1 ), + .DATA_WIDTH ( DataWidth ), + .ADDR_WIDTH ( AddrWidth ), + .BE_WIDTH ( BeWidth ), + .axi_req_t ( c2s_wide_req_t ), + .axi_resp_t ( c2s_wide_resp_t ), + `ifdef TARGET_MCHAN + .NB_CTRLS ( Cfg.NumCores + 2 ), + .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ), + .TCDM_ADD_WIDTH ( TcdmAddrWidth ) + `else + .NB_PE_PORTS ( 2 ), + .NUM_BIDIR_STREAMS ( 1 ), + .GLOBAL_QUEUE_DEPTH ( 2 ), + .MUX_READ ( 1'b1 ), + .TCDM_MEM2BANKS ( !Cfg.DmaUseHwpePort ) + `endif + ) dmac_wrap_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .pe_ctrl_slave ( s_periph_dma_bus[1:0] ), + .ctrl_slave ( s_core_dmactrl_bus ), + .tcdm_master ( s_hci_dma ), + `ifdef TARGET_MCHAN + .ext_master_req_o ( /* MCHAN uses narrow port - not connected to wide */ ), + .ext_master_resp_i ( '0 ), + `else + .ext_master_req_o ( {s_dma_narrow_master_req} ), + .ext_master_resp_i ( {s_dma_narrow_master_resp} ), + `endif + .term_event_o ( s_dma_event ), + .term_irq_o ( s_dma_irq ), + .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ), + .term_irq_pe_o ( {s_dma_fc_irq, s_dma_cl_irq} ), + .busy_o ( s_dmac_busy ) + ); +end else begin : gen_narrow_port_idma + dmac_wrap #( + .NB_CORES ( Cfg.NumCores ), + .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), + .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), + .AXI_DATA_WIDTH ( Cfg.AxiDataOutWidth ), + .AXI_ID_WIDTH ( AxiIdOutWidth ), + .AXI_USER_WIDTH ( Cfg.AxiUserWidth ), + .PE_ID_WIDTH ( Cfg.NumCores + 1 ), + .DATA_WIDTH ( DataWidth ), + .ADDR_WIDTH ( AddrWidth ), + .BE_WIDTH ( BeWidth ), + .axi_req_t ( c2s_out_int_req_t ), + .axi_resp_t ( c2s_out_int_resp_t ), + `ifdef TARGET_MCHAN + .NB_CTRLS ( Cfg.NumCores + 2 ), + .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ), + .TCDM_ADD_WIDTH ( TcdmAddrWidth ) + `else + .NB_PE_PORTS ( 2 ), + .NUM_BIDIR_STREAMS ( 1 ), + .GLOBAL_QUEUE_DEPTH ( 2 ), + .MUX_READ ( 1'b1 ), + .TCDM_MEM2BANKS ( !Cfg.DmaUseHwpePort ) + `endif + ) dmac_wrap_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .pe_ctrl_slave ( s_periph_dma_bus[1:0] ), + .ctrl_slave ( s_core_dmactrl_bus ), + .tcdm_master ( s_hci_dma ), + `ifdef TARGET_MCHAN + .ext_master_req_o ( /* MCHAN uses narrow port - not connected to wide */ ), + .ext_master_resp_i ( '0 ), + `else + .ext_master_req_o ( {s_dma_narrow_master_req} ), + .ext_master_resp_i ( {s_dma_narrow_master_resp} ), + `endif + .term_event_o ( s_dma_event ), + .term_irq_o ( s_dma_irq ), + .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ), + .term_irq_pe_o ( {s_dma_fc_irq, s_dma_cl_irq} ), + .busy_o ( s_dmac_busy ) + ); +end //*************************************************** @@ -1686,54 +1733,76 @@ c2s_remap_resp_t src_remap_resp; // Connect DMA narrow master when wide port disabled, otherwise cluster bus master if (WidePortShouldBeEnabled) begin : gen_cluster_bus_narrow_master - `AXI_ASSIGN_REQ_STRUCT(src_remap_req,s_data_master_req) - `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp,src_remap_resp) + `AXI_ASSIGN_REQ_STRUCT(src_remap_req, s_data_master_req) + `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp, src_remap_resp) end else begin : gen_dma_narrow_master - // Merge cluster bus master and DMA narrow master - c2s_remap_req_t [1:0] narrow_master_reqs; + // Merge cluster bus master and DMA narrow master via AXI multiplexer + localparam int SlvIdWidth = AxiIdOutWidth; + localparam int MstIdWidth = AxiIdOutWidth + 1; + + // Widened AW channel for mux output + `AXI_TYPEDEF_AW_CHAN_T(c2s_mux_aw_chan_t, logic[Cfg.AxiAddrWidth-1:0], logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_W_CHAN_T(c2s_mux_w_chan_t, logic[Cfg.AxiDataOutWidth-1:0], logic[Cfg.AxiDataOutWidth/8-1:0], logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_B_CHAN_T(c2s_mux_b_chan_t, logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_AR_CHAN_T(c2s_mux_ar_chan_t, logic[Cfg.AxiAddrWidth-1:0], logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0]) + `AXI_TYPEDEF_R_CHAN_T(c2s_mux_r_chan_t, logic[Cfg.AxiDataOutWidth-1:0], logic[MstIdWidth-1:0], logic[Cfg.AxiUserWidth-1:0]) + + `AXI_TYPEDEF_REQ_T(c2s_mux_req_t, c2s_mux_aw_chan_t, c2s_mux_w_chan_t, c2s_mux_ar_chan_t) + `AXI_TYPEDEF_RESP_T(c2s_mux_resp_t, c2s_mux_b_chan_t, c2s_mux_r_chan_t) + + // Arrays for the two slave ports + c2s_remap_req_t [1:0] narrow_master_reqs; c2s_remap_resp_t [1:0] narrow_master_resps; - - `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[0],s_data_master_req) // Cluster bus - `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[1],s_dma_narrow_master_req) // DMA narrow - `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp,narrow_master_resps[0]) - `AXI_ASSIGN_RESP_STRUCT(s_dma_narrow_master_resp,narrow_master_resps[1]) - - // Simple AXI crossbar to merge two narrow masters - axi_xbar #( - .Cfg ( '{ - NoSlvPorts: 2, - NoMstPorts: 1, - MaxMstTrans: 4, - MaxSlvTrans: 4, - FallThrough: 1'b0, - LatencyMode: axi_pkg::CUT_ALL_AX, - PipelineStages: 0, - AxiIdWidthSlvPorts: AxiIdOutWidth, - AxiIdUsedSlvPorts: AxiIdOutWidth, - UniqueIds: 1'b1, - AxiAddrWidth: Cfg.AxiAddrWidth, - AxiDataWidth: Cfg.AxiDataOutWidth, - NoAddrRules: 1 - } ), - .ATOPs ( 1'b1 ), - .Connectivity ( '1 ), - .slv_req_t ( c2s_remap_req_t ), - .slv_resp_t ( c2s_remap_resp_t ), - .mst_req_t ( c2s_remap_req_t ), - .mst_resp_t ( c2s_remap_resp_t ), - .rule_t ( axi_pkg::xbar_rule_32_t ) - ) i_narrow_master_xbar ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .test_i ( test_mode_i ), - .slv_ports_req_i ( narrow_master_reqs ), - .slv_ports_resp_o( narrow_master_resps ), - .mst_ports_req_o ( src_remap_req ), - .mst_ports_resp_i( src_remap_resp ), - .addr_map_i ( '{'{idx: 0, start_addr: '0, end_addr: '1}} ), - .en_default_mst_port_i( '1 ), - .default_mst_port_i ( '0 ) + c2s_mux_req_t mux_req; + c2s_mux_resp_t mux_resp; + + // Bind cluster-bus and DMA inputs + `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[0], s_data_master_req) + `AXI_ASSIGN_REQ_STRUCT(narrow_master_reqs[1], s_dma_narrow_master_req) + + // 2-to-1 AXI multiplexer (prepending ID bit) + axi_mux #( + .SlvAxiIDWidth ( AxiIdOutWidth ), + .slv_aw_chan_t ( c2s_remap_aw_chan_t ), .mst_aw_chan_t ( c2s_mux_aw_chan_t ), + .w_chan_t ( c2s_remap_w_chan_t ), + .slv_b_chan_t ( c2s_remap_b_chan_t ), .mst_b_chan_t ( c2s_mux_b_chan_t ), + .slv_ar_chan_t ( c2s_remap_ar_chan_t ), .mst_ar_chan_t ( c2s_mux_ar_chan_t ), + .slv_r_chan_t ( c2s_remap_r_chan_t ), .mst_r_chan_t ( c2s_mux_r_chan_t ), + .slv_req_t ( c2s_remap_req_t ), .slv_resp_t ( c2s_remap_resp_t ), + .mst_req_t ( c2s_mux_req_t ), .mst_resp_t ( c2s_mux_resp_t ), + .NoSlvPorts ( 2 ), .FallThrough(1'b1) + ) i_idma_narrow_mux ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_i ( test_mode_i ), + // Inputs: cluster-bus first, then DMA narrow + .slv_reqs_i ( narrow_master_reqs ), + .slv_resps_o ( narrow_master_resps ), + // Output of mux feeds ID shrink stage + .mst_req_o ( mux_req ), + .mst_resp_i ( mux_resp ) + ); + + axi_id_remap #( + .AxiSlvPortIdWidth ( MstIdWidth ), // ID width = AxiIdOutWidth + 1 + .AxiSlvPortMaxUniqIds ( 4 ), + .AxiMaxTxnsPerId ( Cfg.AxiMaxOutTrans ), + .AxiMstPortIdWidth ( AxiIdOutWidth ), + .slv_req_t ( c2s_mux_req_t ), + .slv_resp_t ( c2s_mux_resp_t ), + .mst_req_t ( c2s_remap_req_t ), + .mst_resp_t ( c2s_remap_resp_t ) + ) i_idma_narrow_id_shrink ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( mux_req ), + .slv_resp_o ( mux_resp ), + .mst_req_o ( src_remap_req ), + .mst_resp_i ( src_remap_resp ) ); + // Drive external responses from narrow_master_resps + `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp, narrow_master_resps[0]) + `AXI_ASSIGN_RESP_STRUCT(s_dma_narrow_master_resp, narrow_master_resps[1]) end if (Cfg.AxiIdOutWidth != AxiIdOutWidth) begin : gen_c2s_idwremap @@ -1816,62 +1885,67 @@ axi_cdc_src #( c2s_wide_req_t src_wide_req, isolate_src_wide_req; c2s_wide_resp_t src_wide_resp, isolate_src_wide_resp; -assign isolate_src_wide_req = s_dma_master_req; -assign s_dma_master_resp = isolate_src_wide_resp; +// Route DMA master request/response based on WidePortShouldBeEnabled +assign isolate_src_wide_req = WidePortShouldBeEnabled ? s_dma_master_req : s_dma_narrow_master_req; +assign s_dma_master_resp = WidePortShouldBeEnabled ? isolate_src_wide_resp : s_dma_narrow_master_resp; -axi_isolate #( - .NumPending ( 8 ), - .TerminateTransaction ( 1 ), - .AtopSupport ( 1 ), - .AxiAddrWidth ( Cfg.AxiAddrWidth ), - .AxiDataWidth ( Cfg.AxiDataOutWideWidth ), - .AxiIdWidth ( Cfg.AxiIdOutWideWidth ), - .AxiUserWidth ( Cfg.AxiUserWidth ), - .axi_req_t ( c2s_wide_req_t ), - .axi_resp_t ( c2s_wide_resp_t ) -) i_axi_wide_master_isolate ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .slv_req_i ( isolate_src_wide_req ), - .slv_resp_o ( isolate_src_wide_resp ), - .mst_req_o ( src_wide_req ), - .mst_resp_i ( src_wide_resp ), - .isolate_i ( axi_isolate_synch ), - .isolated_o ( axi_isolated_wide_o ) -); - -axi_cdc_src #( - .aw_chan_t ( c2s_wide_aw_chan_t ), - .w_chan_t ( c2s_wide_w_chan_t ), - .b_chan_t ( c2s_wide_b_chan_t ), - .r_chan_t ( c2s_wide_r_chan_t ), - .ar_chan_t ( c2s_wide_ar_chan_t ), - .axi_req_t ( c2s_wide_req_t ), - .axi_resp_t ( c2s_wide_resp_t ), - .LogDepth ( Cfg.AxiCdcLogDepth ), - .SyncStages ( Cfg.AxiCdcSyncStages ) -) axi_wide_master_cdc_i ( - .src_rst_ni ( pwr_on_rst_ni ), - .src_clk_i ( clk_i ), - .src_req_i ( src_wide_req ), - .src_resp_o ( src_wide_resp ), - .async_data_master_aw_wptr_o ( async_wide_master_aw_wptr_o ), - .async_data_master_aw_rptr_i ( async_wide_master_aw_rptr_i ), - .async_data_master_aw_data_o ( async_wide_master_aw_data_o ), - .async_data_master_w_wptr_o ( async_wide_master_w_wptr_o ), - .async_data_master_w_rptr_i ( async_wide_master_w_rptr_i ), - .async_data_master_w_data_o ( async_wide_master_w_data_o ), - .async_data_master_ar_wptr_o ( async_wide_master_ar_wptr_o ), - .async_data_master_ar_rptr_i ( async_wide_master_ar_rptr_i ), - .async_data_master_ar_data_o ( async_wide_master_ar_data_o ), - .async_data_master_b_wptr_i ( async_wide_master_b_wptr_i ), - .async_data_master_b_rptr_o ( async_wide_master_b_rptr_o ), - .async_data_master_b_data_i ( async_wide_master_b_data_i ), - .async_data_master_r_wptr_i ( async_wide_master_r_wptr_i ), - .async_data_master_r_rptr_o ( async_wide_master_r_rptr_o ), - .async_data_master_r_data_i ( async_wide_master_r_data_i ) -); +// Instantiate wide port isolation and CDC only when enabled +generate + if (WidePortShouldBeEnabled) begin : gen_wide_port + axi_isolate #( + .NumPending ( 8 ), + .TerminateTransaction ( 1 ), + .AtopSupport ( 1 ), + .AxiAddrWidth ( Cfg.AxiAddrWidth ), + .AxiDataWidth ( Cfg.AxiDataOutWideWidth ), + .AxiIdWidth ( Cfg.AxiIdOutWideWidth ), + .AxiUserWidth ( Cfg.AxiUserWidth ), + .axi_req_t ( c2s_wide_req_t ), + .axi_resp_t ( c2s_wide_resp_t ) + ) i_axi_wide_master_isolate ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( isolate_src_wide_req ), + .slv_resp_o ( isolate_src_wide_resp ), + .mst_req_o ( src_wide_req ), + .mst_resp_i ( src_wide_resp ), + .isolate_i ( axi_isolate_synch ), + .isolated_o ( axi_isolated_wide_o ) + ); + axi_cdc_src #( + .aw_chan_t ( c2s_wide_aw_chan_t ), + .w_chan_t ( c2s_wide_w_chan_t ), + .b_chan_t ( c2s_wide_b_chan_t ), + .r_chan_t ( c2s_wide_r_chan_t ), + .ar_chan_t ( c2s_wide_ar_chan_t ), + .axi_req_t ( c2s_wide_req_t ), + .axi_resp_t ( c2s_wide_resp_t ), + .LogDepth ( Cfg.AxiCdcLogDepth ), + .SyncStages ( Cfg.AxiCdcSyncStages ) + ) axi_wide_master_cdc_i ( + .src_rst_ni ( pwr_on_rst_ni ), + .src_clk_i ( clk_i ), + .src_req_i ( src_wide_req ), + .src_resp_o ( src_wide_resp ), + .async_data_master_aw_wptr_o ( async_wide_master_aw_wptr_o ), + .async_data_master_aw_rptr_i ( async_wide_master_aw_rptr_i ), + .async_data_master_aw_data_o ( async_wide_master_aw_data_o ), + .async_data_master_w_wptr_o ( async_wide_master_w_wptr_o ), + .async_data_master_w_rptr_i ( async_wide_master_w_rptr_i ), + .async_data_master_w_data_o ( async_wide_master_w_data_o ), + .async_data_master_ar_wptr_o ( async_wide_master_ar_wptr_o ), + .async_data_master_ar_rptr_i ( async_wide_master_ar_rptr_i ), + .async_data_master_ar_data_o ( async_wide_master_ar_data_o ), + .async_data_master_b_wptr_i ( async_wide_master_b_wptr_i ), + .async_data_master_b_rptr_o ( async_wide_master_b_rptr_o ), + .async_data_master_b_data_i ( async_wide_master_b_data_i ), + .async_data_master_r_wptr_i ( async_wide_master_r_wptr_i ), + .async_data_master_r_rptr_o ( async_wide_master_r_rptr_o ), + .async_data_master_r_data_i ( async_wide_master_r_data_i ) + ); + end +endgenerate // SOC TO CLUSTER `AXI_TYPEDEF_AW_CHAN_T(s2c_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[Cfg.AxiIdInWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) From 0b85452e1980fbc220bcaff0ce0e9846a32ad63d Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 24 Jul 2025 18:34:45 +0200 Subject: [PATCH 28/67] bumpb runtime version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b5e8e7a6..dea09345 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ sw-clean: ## Clone pulp-runtime as SW stack PULP_RUNTIME_REMOTE ?= https://github.com/pulp-platform/pulp-runtime.git -PULP_RUNTIME_COMMIT ?= 8000372999b5933317c357367b9d2f445a3cbfd0 # branch: dkeller/chimera-v2 +PULP_RUNTIME_COMMIT ?= af75d00 # branch: dkeller/chimera-v2 pulp-runtime: git clone $(PULP_RUNTIME_REMOTE) $@ From 5e04d322583b38800fc3525c7db85f173ae3b230 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 24 Jul 2025 19:19:38 +0200 Subject: [PATCH 29/67] [wip] fixing CI --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dea09345..7683bc6f 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ endef ###################### NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git -NONFREE_COMMIT ?= f492530603be007bdc82c37bc0ebae6599d7c7a2 # branch: smazzola/chimera +NONFREE_COMMIT ?= 6866f0ce # branch: smazzola/chimera nonfree-init: git clone $(NONFREE_REMOTE) nonfree From 845a3ba1b506d63b57bb06e354611f695cfae62f Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 24 Jul 2025 21:54:00 +0200 Subject: [PATCH 30/67] [wip] bump versions --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 7683bc6f..130d5ef7 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ endef ###################### NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git -NONFREE_COMMIT ?= 6866f0ce # branch: smazzola/chimera +NONFREE_COMMIT ?= 87a36fdfc234dda5c753f85f244ef21c94633d82 # branch: dkeller/chimera-v2 nonfree-init: git clone $(NONFREE_REMOTE) nonfree @@ -76,7 +76,7 @@ sw-clean: ## Clone pulp-runtime as SW stack PULP_RUNTIME_REMOTE ?= https://github.com/pulp-platform/pulp-runtime.git -PULP_RUNTIME_COMMIT ?= af75d00 # branch: dkeller/chimera-v2 +PULP_RUNTIME_COMMIT ?= 118d86c2fadc130ffb3b362f6e2875ca88acb30c # branch: dkeller/chimera-v2 pulp-runtime: git clone $(PULP_RUNTIME_REMOTE) $@ @@ -92,7 +92,7 @@ fault_injection_sim: ## Clone regression tests REGRESSION_TESTS_REMOTE ?= https://github.com/pulp-platform/regression_tests.git -REGRESSION_TESTS_COMMIT ?= 9ead1c0f6197e79d731749789f381ad83c7d81a2 # branch: dkeller/chimera-v2 +REGRESSION_TESTS_COMMIT ?= 968b2d3e2259a489347017ef99910257fa247970 # branch: dkeller/chimera-v2 regression_tests: git clone $(REGRESSION_TESTS_REMOTE) $@ From ae44fee2e33d30f16456995653f796ff15eece37 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 24 Jul 2025 22:06:21 +0200 Subject: [PATCH 31/67] [wip] bump version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 130d5ef7..c9c00d72 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ endef ###################### NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git -NONFREE_COMMIT ?= 87a36fdfc234dda5c753f85f244ef21c94633d82 # branch: dkeller/chimera-v2 +NONFREE_COMMIT ?= 6f5b4b5aa85b6f3ac4bbe03439dd250ab4810d80 # branch: dkeller/chimera-v2 nonfree-init: git clone $(NONFREE_REMOTE) nonfree From 6cb17430e6fcc6107b19f2014790bcc8fe8aacb5 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 24 Jul 2025 22:25:23 +0200 Subject: [PATCH 32/67] track tip of the branch for nonfree repo, added idma tests in nonfree --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index c9c00d72..c1fd2776 100644 --- a/Makefile +++ b/Makefile @@ -38,10 +38,11 @@ endef ###################### NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git -NONFREE_COMMIT ?= 6f5b4b5aa85b6f3ac4bbe03439dd250ab4810d80 # branch: dkeller/chimera-v2 +#NONFREE_COMMIT ?= 6f5b4b5aa85b6f3ac4bbe03439dd250ab4810d80 # branch: dkeller/chimera-v2 +NONFREE_BRANCH ?= dkeller/chimera-v2 nonfree-init: - git clone $(NONFREE_REMOTE) nonfree + git clone --single-branch --branch $(NONFREE_BRANCH) $(NONFREE_REMOTE) nonfree cd nonfree && git checkout $(NONFREE_COMMIT) ################ From 039428b4d4793cef786f43e8504c0cc1e64bd23e Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 7 Aug 2025 10:33:30 +0200 Subject: [PATCH 33/67] fix narrow / wide master mismatch Signed-off-by: Daniel Keller --- rtl/pulp_cluster.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 908d2341..162d7886 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -842,8 +842,8 @@ if (WidePortShouldBeEnabled) begin : gen_wide_port_idma .ext_master_req_o ( /* MCHAN uses narrow port - not connected to wide */ ), .ext_master_resp_i ( '0 ), `else - .ext_master_req_o ( {s_dma_narrow_master_req} ), - .ext_master_resp_i ( {s_dma_narrow_master_resp} ), + .ext_master_req_o ( {s_dma_master_req} ), + .ext_master_resp_i ( {s_dma_master_resp} ), `endif .term_event_o ( s_dma_event ), .term_irq_o ( s_dma_irq ), From 1cb29e7fc60fc76cc701b7010ca621b6c26ade25 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 7 Aug 2025 16:39:13 +0200 Subject: [PATCH 34/67] [wip] update TB to parametrize narrow vs wide port Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 565 +++++++++++++++++++++++++----------------- 1 file changed, 341 insertions(+), 224 deletions(-) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index b2d7e80f..62727a28 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -24,7 +24,13 @@ import "DPI-C" function read_elf(input string filename); import "DPI-C" function byte get_section(output longint address, output longint len); import "DPI-C" context function byte read_section(input longint address, inout byte buffer[], input longint len); +// PULP Cluster flavors +`ifndef TB_ENABLE_WIDE_PORT + `define TB_ENABLE_WIDE_PORT 1 +`endif + module pulp_cluster_tb; + localparam bit EnableWidePort = `TB_ENABLE_WIDE_PORT; import pulp_cluster_package::*; import uvm_pkg::*; @@ -56,8 +62,8 @@ module pulp_cluster_tb; localparam AxiWideByteOffset = $clog2(AxiWideBeWidth); localparam AxiUw = 10; - localparam DmaAxiDw = 256; - localparam DmaAxiIw = 1; + localparam DmaAxiDw = EnableWidePort ? 256 : 64; + localparam DmaAxiIw = EnableWidePort ? 1 : AxiIw; localparam bit[AxiAw-1:0] ClustBase = 'h10000000; localparam bit[AxiAw-1:0] ClustPeriphOffs = 'h00200000; @@ -136,19 +142,340 @@ module pulp_cluster_tb; .AXI_USER_WIDTH( AxiUw ) ) axi_slave[NMst-1:0](); - AXI_BUS #( - .AXI_ADDR_WIDTH( AxiAw ), - .AXI_DATA_WIDTH( DmaAxiDw ), - .AXI_ID_WIDTH ( DmaAxiIw ), - .AXI_USER_WIDTH( AxiUw ) - ) dma_slave(); + localparam pulp_cluster_cfg_t PulpClusterCfg = '{ + CoreType: pulp_cluster_package::RI5CY, + NumCores: `NB_CORES, + DmaNumPlugs: `NB_DMAS, + DmaNumOutstandingBursts: 8, + DmaBurstLength: 256, + DmaUseHwpePort: 1, + NumMstPeriphs: `NB_MPERIPHS, + NumSlvPeriphs: `NB_SPERIPHS, + ClusterAlias: 1, + ClusterAliasBase: 'h0, + NumSyncStages: 3, + UseHci: 1, + TcdmSize: 128*1024, + TcdmNumBank: 16, + HwpePresent: 1, + HwpeCfg: '{NumHwpes: 1, HwpeList: {NEUREKA}}, + HwpeNumPorts: 9, + HMRPresent: 1, + HMRDmrEnabled: 1, + HMRTmrEnabled: 1, + HMRDmrFIxed: 0, + HMRTmrFIxed: 0, + HMRInterleaveGrps: 1, + HMREnableRapidRecovery: 1, + HMRSeparateDataVoters: 1, + HMRSeparateAxiBus: 0, + HMRNumBusVoters: 1, + EnableECC: 0, + ECCInterco: 0, + iCacheNumBanks: 2, + iCacheNumLines: 1, + iCacheNumWays: 4, + iCacheSharedSize: 4*1024, + iCachePrivateSize: 512, + iCachePrivateDataWidth: 32, + EnableReducedTag: 1, + L2Size: 1000*1024, + DmBaseAddr: 'h60203000, + BootRomBaseAddr: BootAddr, + BootAddr: BootAddr, + EnablePrivateFpu: 1, + EnablePrivateFpDivSqrt: 0, + NumAxiIn: NumAxiSubordinatePorts, + NumAxiOut: NumAxiManagerPorts, + AxiIdInWidth: AxiIw-2, + AxiIdOutWidth: AxiIw, + AxiIdOutWideWidth: 1, + AxiAddrWidth: AxiAw, + AxiDataInWidth: AxiDw, + AxiDataOutWidth: AxiDw, + AxiDataOutWideWidth: DmaAxiDw, + AxiUserWidth: AxiUw, + AxiMaxInTrans: 64, + AxiMaxOutTrans: 64, + AxiCdcLogDepth: 3, + AxiCdcSyncStages: 3, + SyncStages: 3, + ClusterBaseAddr: ClustBaseAddr, + ClusterPeriphOffs: ClustPeriphOffs, + ClusterExternalOffs: ClustExtOffs, + EnableRemapAddress: 0, + EnableWidePort: EnableWidePort, + SnitchICache: 0, + default: '0 + }; - AXI_BUS #( - .AXI_ADDR_WIDTH( AxiAw ), - .AXI_DATA_WIDTH( DmaAxiDw ), - .AXI_ID_WIDTH ( AxiIw ), - .AXI_USER_WIDTH( AxiUw ) - ) dma_slave_iw(); + generate + if (EnableWidePort) begin : gen_dma_buses + AXI_BUS #( + .AXI_ADDR_WIDTH( AxiAw ), + .AXI_DATA_WIDTH( DmaAxiDw ), + .AXI_ID_WIDTH ( DmaAxiIw ), + .AXI_USER_WIDTH( AxiUw ) + ) dma_slave(); + + AXI_BUS #( + .AXI_ADDR_WIDTH( AxiAw ), + .AXI_DATA_WIDTH( DmaAxiDw ), + .AXI_ID_WIDTH ( AxiIw ), + .AXI_USER_WIDTH( AxiUw ) + ) dma_slave_iw(); + + AXI_BUS_ASYNC_GRAY #( + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_DATA_WIDTH ( DmaAxiDw ), + .AXI_ID_WIDTH ( DmaAxiIw ), + .AXI_USER_WIDTH ( AxiUw ), + .LOG_DEPTH ( 3 ) + ) async_dma_axi_bus(); + + axi_dw_converter_intf #( + .AXI_ID_WIDTH ( AxiIw ), + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_SLV_PORT_DATA_WIDTH ( DmaAxiDw ), + .AXI_MST_PORT_DATA_WIDTH ( AxiDw ), + .AXI_USER_WIDTH ( AxiUw ), + .AXI_MAX_READS ( 3 ) + ) i_dma_dw_conv ( + .clk_i ( s_clk ), + .rst_ni ( s_rstn ), + .slv ( dma_slave_iw ), + .mst ( axi_slave[2] ) + ); + + axi_iw_converter_intf #( + .AXI_SLV_PORT_ID_WIDTH ( DmaAxiIw ), + .AXI_MST_PORT_ID_WIDTH ( AxiIw ), + .AXI_SLV_PORT_MAX_UNIQ_IDS ( 5 ), + .AXI_SLV_PORT_MAX_TXNS_PER_ID ( 5 ), + .AXI_SLV_PORT_MAX_TXNS ( 5 ), + .AXI_MST_PORT_MAX_UNIQ_IDS ( 5 ), + .AXI_MST_PORT_MAX_TXNS_PER_ID ( 5 ), + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_DATA_WIDTH ( DmaAxiDw ), + .AXI_USER_WIDTH ( AxiUw ) + ) i_dma_iw_conv ( + .clk_i ( s_clk ), + .rst_ni ( s_rstn ), + .slv ( dma_slave ), + .mst ( dma_slave_iw ) + ); + + axi_cdc_dst_intf #( + .AXI_ADDR_WIDTH ( AxiAw ), + .AXI_DATA_WIDTH ( DmaAxiDw ), + .AXI_ID_WIDTH ( DmaAxiIw ), + .AXI_USER_WIDTH ( AxiUw ), + .LOG_DEPTH ( 3 ) + ) cluster_to_soc_dma_dst_cdc_fifo_i ( + .dst_clk_i ( s_clk ), + .dst_rst_ni ( s_rstn ), + .src ( async_dma_axi_bus ), + .dst ( dma_slave ) + ); + + pulp_cluster + `ifdef USE_PULP_PARAMETERS + #( .Cfg ( PulpClusterCfg ) ) + `endif + cluster_i ( + .clk_i ( s_clk ), + .rst_ni ( s_rstn ), + .pwr_on_rst_ni ( s_rstn ), + .ref_clk_i ( s_clk ), + .axi_isolate_i ( '0 ), + .axi_isolated_o ( ), + .axi_isolated_wide_o ( ), + + .pmu_mem_pwdn_i ( 1'b0 ), + + .base_addr_i ( ClustBase[31:28] ), + + .dma_pe_evt_ack_i ( '1 ), + .dma_pe_evt_valid_o ( ), + + .dma_pe_irq_ack_i ( 1'b1 ), + .dma_pe_irq_valid_o ( ), + + .dbg_irq_valid_i ( '0 ), + .mbox_irq_i ( '0 ), + + .pf_evt_ack_i ( 1'b1 ), + .pf_evt_valid_o ( ), + + .async_cluster_events_wptr_i ( '0 ), + .async_cluster_events_rptr_o ( ), + .async_cluster_events_data_i ( '0 ), + + .en_sa_boot_i ( s_cluster_en_sa_boot ), + .test_mode_i ( 1'b0 ), + .fetch_en_i ( s_cluster_fetch_en ), + .eoc_o ( s_cluster_eoc ), + .busy_o ( s_cluster_busy ), + .cluster_id_i ( ClustIdx ), + + .async_data_master_aw_wptr_o ( async_cluster_to_soc_axi_bus.aw_wptr ), + .async_data_master_aw_rptr_i ( async_cluster_to_soc_axi_bus.aw_rptr ), + .async_data_master_aw_data_o ( async_cluster_to_soc_axi_bus.aw_data ), + .async_data_master_ar_wptr_o ( async_cluster_to_soc_axi_bus.ar_wptr ), + .async_data_master_ar_rptr_i ( async_cluster_to_soc_axi_bus.ar_rptr ), + .async_data_master_ar_data_o ( async_cluster_to_soc_axi_bus.ar_data ), + .async_data_master_w_data_o ( async_cluster_to_soc_axi_bus.w_data ), + .async_data_master_w_wptr_o ( async_cluster_to_soc_axi_bus.w_wptr ), + .async_data_master_w_rptr_i ( async_cluster_to_soc_axi_bus.w_rptr ), + .async_data_master_r_wptr_i ( async_cluster_to_soc_axi_bus.r_wptr ), + .async_data_master_r_rptr_o ( async_cluster_to_soc_axi_bus.r_rptr ), + .async_data_master_r_data_i ( async_cluster_to_soc_axi_bus.r_data ), + .async_data_master_b_wptr_i ( async_cluster_to_soc_axi_bus.b_wptr ), + .async_data_master_b_rptr_o ( async_cluster_to_soc_axi_bus.b_rptr ), + .async_data_master_b_data_i ( async_cluster_to_soc_axi_bus.b_data ), + + .async_wide_master_aw_wptr_o ( async_dma_axi_bus.aw_wptr ), + .async_wide_master_aw_rptr_i ( async_dma_axi_bus.aw_rptr ), + .async_wide_master_aw_data_o ( async_dma_axi_bus.aw_data ), + .async_wide_master_ar_wptr_o ( async_dma_axi_bus.ar_wptr ), + .async_wide_master_ar_rptr_i ( async_dma_axi_bus.ar_rptr ), + .async_wide_master_ar_data_o ( async_dma_axi_bus.ar_data ), + .async_wide_master_w_data_o ( async_dma_axi_bus.w_data ), + .async_wide_master_w_wptr_o ( async_dma_axi_bus.w_wptr ), + .async_wide_master_w_rptr_i ( async_dma_axi_bus.w_rptr ), + .async_wide_master_r_wptr_i ( async_dma_axi_bus.r_wptr ), + .async_wide_master_r_rptr_o ( async_dma_axi_bus.r_rptr ), + .async_wide_master_r_data_i ( async_dma_axi_bus.r_data ), + .async_wide_master_b_wptr_i ( async_dma_axi_bus.b_wptr ), + .async_wide_master_b_rptr_o ( async_dma_axi_bus.b_rptr ), + .async_wide_master_b_data_i ( async_dma_axi_bus.b_data ), + + .async_data_slave_aw_wptr_i ( async_soc_to_cluster_axi_bus.aw_wptr ), + .async_data_slave_aw_rptr_o ( async_soc_to_cluster_axi_bus.aw_rptr ), + .async_data_slave_aw_data_i ( async_soc_to_cluster_axi_bus.aw_data ), + .async_data_slave_ar_wptr_i ( async_soc_to_cluster_axi_bus.ar_wptr ), + .async_data_slave_ar_rptr_o ( async_soc_to_cluster_axi_bus.ar_rptr ), + .async_data_slave_ar_data_i ( async_soc_to_cluster_axi_bus.ar_data ), + .async_data_slave_w_data_i ( async_soc_to_cluster_axi_bus.w_data ), + .async_data_slave_w_wptr_i ( async_soc_to_cluster_axi_bus.w_wptr ), + .async_data_slave_w_rptr_o ( async_soc_to_cluster_axi_bus.w_rptr ), + .async_data_slave_r_wptr_o ( async_soc_to_cluster_axi_bus.r_wptr ), + .async_data_slave_r_rptr_i ( async_soc_to_cluster_axi_bus.r_rptr ), + .async_data_slave_r_data_o ( async_soc_to_cluster_axi_bus.r_data ), + .async_data_slave_b_wptr_o ( async_soc_to_cluster_axi_bus.b_wptr ), + .async_data_slave_b_rptr_i ( async_soc_to_cluster_axi_bus.b_rptr ), + .async_data_slave_b_data_o ( async_soc_to_cluster_axi_bus.b_data ) + ); + + end else begin : gen_dma_stubs + // Dummy wires with correct width for cluster port connections + logic async_dma_axi_bus_aw_wptr; + logic async_dma_axi_bus_aw_rptr; + logic async_dma_axi_bus_aw_data; + logic async_dma_axi_bus_ar_wptr; + logic async_dma_axi_bus_ar_rptr; + logic async_dma_axi_bus_ar_data; + logic async_dma_axi_bus_w_wptr; + logic async_dma_axi_bus_w_rptr; + logic async_dma_axi_bus_w_data; + logic async_dma_axi_bus_r_wptr; + logic async_dma_axi_bus_r_rptr; + logic async_dma_axi_bus_r_data; + logic async_dma_axi_bus_b_wptr; + logic async_dma_axi_bus_b_rptr; + logic async_dma_axi_bus_b_data; + + pulp_cluster + `ifdef USE_PULP_PARAMETERS + #( .Cfg ( PulpClusterCfg ) ) + `endif + cluster_i ( + .clk_i ( s_clk ), + .rst_ni ( s_rstn ), + .pwr_on_rst_ni ( s_rstn ), + .ref_clk_i ( s_clk ), + .axi_isolate_i ( '0 ), + .axi_isolated_o ( ), + .axi_isolated_wide_o ( ), + + .pmu_mem_pwdn_i ( 1'b0 ), + + .base_addr_i ( ClustBase[31:28] ), + + .dma_pe_evt_ack_i ( '1 ), + .dma_pe_evt_valid_o ( ), + + .dma_pe_irq_ack_i ( 1'b1 ), + .dma_pe_irq_valid_o ( ), + + .dbg_irq_valid_i ( '0 ), + .mbox_irq_i ( '0 ), + + .pf_evt_ack_i ( 1'b1 ), + .pf_evt_valid_o ( ), + + .async_cluster_events_wptr_i ( '0 ), + .async_cluster_events_rptr_o ( ), + .async_cluster_events_data_i ( '0 ), + + .en_sa_boot_i ( s_cluster_en_sa_boot ), + .test_mode_i ( 1'b0 ), + .fetch_en_i ( s_cluster_fetch_en ), + .eoc_o ( s_cluster_eoc ), + .busy_o ( s_cluster_busy ), + .cluster_id_i ( ClustIdx ), + + .async_data_master_aw_wptr_o ( async_cluster_to_soc_axi_bus.aw_wptr ), + .async_data_master_aw_rptr_i ( async_cluster_to_soc_axi_bus.aw_rptr ), + .async_data_master_aw_data_o ( async_cluster_to_soc_axi_bus.aw_data ), + .async_data_master_ar_wptr_o ( async_cluster_to_soc_axi_bus.ar_wptr ), + .async_data_master_ar_rptr_i ( async_cluster_to_soc_axi_bus.ar_rptr ), + .async_data_master_ar_data_o ( async_cluster_to_soc_axi_bus.ar_data ), + .async_data_master_w_data_o ( async_cluster_to_soc_axi_bus.w_data ), + .async_data_master_w_wptr_o ( async_cluster_to_soc_axi_bus.w_wptr ), + .async_data_master_w_rptr_i ( async_cluster_to_soc_axi_bus.w_rptr ), + .async_data_master_r_wptr_i ( async_cluster_to_soc_axi_bus.r_wptr ), + .async_data_master_r_rptr_o ( async_cluster_to_soc_axi_bus.r_rptr ), + .async_data_master_r_data_i ( async_cluster_to_soc_axi_bus.r_data ), + .async_data_master_b_wptr_i ( async_cluster_to_soc_axi_bus.b_wptr ), + .async_data_master_b_rptr_o ( async_cluster_to_soc_axi_bus.b_rptr ), + .async_data_master_b_data_i ( async_cluster_to_soc_axi_bus.b_data ), + + // Wide master ports tied off when wide port disabled + .async_wide_master_aw_wptr_o ( ), + .async_wide_master_aw_rptr_i ( '0 ), + .async_wide_master_aw_data_o ( ), + .async_wide_master_ar_wptr_o ( ), + .async_wide_master_ar_rptr_i ( '0 ), + .async_wide_master_ar_data_o ( ), + .async_wide_master_w_data_o ( ), + .async_wide_master_w_wptr_o ( ), + .async_wide_master_w_rptr_i ( '0 ), + .async_wide_master_r_wptr_i ( '0 ), + .async_wide_master_r_rptr_o ( ), + .async_wide_master_r_data_i ( 'x ), + .async_wide_master_b_wptr_i ( '0 ), + .async_wide_master_b_rptr_o ( ), + .async_wide_master_b_data_i ( 'x ), + + .async_data_slave_aw_wptr_i ( async_soc_to_cluster_axi_bus.aw_wptr ), + .async_data_slave_aw_rptr_o ( async_soc_to_cluster_axi_bus.aw_rptr ), + .async_data_slave_aw_data_i ( async_soc_to_cluster_axi_bus.aw_data ), + .async_data_slave_ar_wptr_i ( async_soc_to_cluster_axi_bus.ar_wptr ), + .async_data_slave_ar_rptr_o ( async_soc_to_cluster_axi_bus.ar_rptr ), + .async_data_slave_ar_data_i ( async_soc_to_cluster_axi_bus.ar_data ), + .async_data_slave_w_data_i ( async_soc_to_cluster_axi_bus.w_data ), + .async_data_slave_w_wptr_i ( async_soc_to_cluster_axi_bus.w_wptr ), + .async_data_slave_w_rptr_o ( async_soc_to_cluster_axi_bus.w_rptr ), + .async_data_slave_r_wptr_o ( async_soc_to_cluster_axi_bus.r_wptr ), + .async_data_slave_r_rptr_i ( async_soc_to_cluster_axi_bus.r_rptr ), + .async_data_slave_r_data_o ( async_soc_to_cluster_axi_bus.r_data ), + .async_data_slave_b_wptr_o ( async_soc_to_cluster_axi_bus.b_wptr ), + .async_data_slave_b_rptr_i ( async_soc_to_cluster_axi_bus.b_rptr ), + .async_data_slave_b_data_o ( async_soc_to_cluster_axi_bus.b_data ) + ); + end + endgenerate AXI_BUS #( .AXI_ADDR_WIDTH( AxiAw ), @@ -172,14 +499,6 @@ module pulp_cluster_tb; .AXI_USER_WIDTH ( AxiUw ), .LOG_DEPTH ( 3 ) ) async_cluster_to_soc_axi_bus(); - - AXI_BUS_ASYNC_GRAY #( - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_DATA_WIDTH ( DmaAxiDw ), - .AXI_ID_WIDTH ( DmaAxiIw ), - .AXI_USER_WIDTH ( AxiUw ), - .LOG_DEPTH ( 3 ) - ) async_dma_axi_bus(); // Behavioural slaves axi_m_req_t axi_memreq; @@ -188,38 +507,6 @@ module pulp_cluster_tb; `AXI_ASSIGN_TO_REQ(axi_memreq, axi_master[1]) `AXI_ASSIGN_FROM_RESP(axi_master[1], axi_memrsp) - axi_dw_converter_intf #( - .AXI_ID_WIDTH ( AxiIw ), - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_SLV_PORT_DATA_WIDTH ( DmaAxiDw ), - .AXI_MST_PORT_DATA_WIDTH ( AxiDw ), - .AXI_USER_WIDTH ( AxiUw ), - .AXI_MAX_READS ( 3 ) - ) i_dma_dw_conv ( - .clk_i ( s_clk ), - .rst_ni ( s_rstn ), - .slv ( dma_slave_iw ), - .mst ( axi_slave[2] ) - ); - - axi_iw_converter_intf #( - .AXI_SLV_PORT_ID_WIDTH ( DmaAxiIw ), - .AXI_MST_PORT_ID_WIDTH ( AxiIw ), - .AXI_SLV_PORT_MAX_UNIQ_IDS ( 5 ), - .AXI_SLV_PORT_MAX_TXNS_PER_ID ( 5 ), - .AXI_SLV_PORT_MAX_TXNS ( 5 ), - .AXI_MST_PORT_MAX_UNIQ_IDS ( 5 ), - .AXI_MST_PORT_MAX_TXNS_PER_ID ( 5 ), - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_DATA_WIDTH ( DmaAxiDw ), - .AXI_USER_WIDTH ( AxiUw ) - ) i_dma_iw_conv ( - .clk_i ( s_clk ), - .rst_ni ( s_rstn ), - .slv ( dma_slave ), - .mst ( dma_slave_iw ) - ); - axi_sim_mem #( .AddrWidth ( AxiAw ), .DataWidth ( AxiDw ), @@ -362,176 +649,6 @@ module pulp_cluster_tb; .dst ( axi_slave[1] ) ); - axi_cdc_dst_intf #( - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_DATA_WIDTH ( DmaAxiDw ), - .AXI_ID_WIDTH ( DmaAxiIw ), - .AXI_USER_WIDTH ( AxiUw ), - .LOG_DEPTH ( 3 ) - ) cluster_to_soc_dma_dst_cdc_fifo_i ( - .dst_clk_i ( s_clk ), - .dst_rst_ni ( s_rstn ), - .src ( async_dma_axi_bus ), - .dst ( dma_slave ) - ); - - localparam pulp_cluster_cfg_t PulpClusterCfg = '{ - CoreType: pulp_cluster_package::RI5CY, - NumCores: `NB_CORES, - DmaNumPlugs: `NB_DMAS, - DmaNumOutstandingBursts: 8, - DmaBurstLength: 256, - DmaUseHwpePort: 1, - NumMstPeriphs: `NB_MPERIPHS, - NumSlvPeriphs: `NB_SPERIPHS, - ClusterAlias: 1, - ClusterAliasBase: 'h0, - NumSyncStages: 3, - UseHci: 1, - TcdmSize: 128*1024, - TcdmNumBank: 16, - HwpePresent: 1, - HwpeCfg: '{NumHwpes: 1, HwpeList: {NEUREKA}}, - HwpeNumPorts: 9, - HMRPresent: 1, - HMRDmrEnabled: 1, - HMRTmrEnabled: 1, - HMRDmrFIxed: 0, - HMRTmrFIxed: 0, - HMRInterleaveGrps: 1, - HMREnableRapidRecovery: 1, - HMRSeparateDataVoters: 1, - HMRSeparateAxiBus: 0, - HMRNumBusVoters: 1, - EnableECC: 0, - ECCInterco: 0, - iCacheNumBanks: 2, - iCacheNumLines: 1, - iCacheNumWays: 4, - iCacheSharedSize: 4*1024, - iCachePrivateSize: 512, - iCachePrivateDataWidth: 32, - EnableReducedTag: 1, - L2Size: 1000*1024, - DmBaseAddr: 'h60203000, - BootRomBaseAddr: BootAddr, - BootAddr: BootAddr, - EnablePrivateFpu: 1, - EnablePrivateFpDivSqrt: 0, - NumAxiIn: NumAxiSubordinatePorts, - NumAxiOut: NumAxiManagerPorts, - AxiIdInWidth: AxiIw-2, - AxiIdOutWidth: AxiIw, - AxiIdOutWideWidth: 1, - AxiAddrWidth: AxiAw, - AxiDataInWidth: AxiDw, - AxiDataOutWidth: AxiDw, - AxiDataOutWideWidth: DmaAxiDw, - AxiUserWidth: AxiUw, - AxiMaxInTrans: 64, - AxiMaxOutTrans: 64, - AxiCdcLogDepth: 3, - AxiCdcSyncStages: 3, - SyncStages: 3, - ClusterBaseAddr: ClustBaseAddr, - ClusterPeriphOffs: ClustPeriphOffs, - ClusterExternalOffs: ClustExtOffs, - EnableRemapAddress: 0, - SnitchICache: 0, - default: '0 - }; - - pulp_cluster -`ifdef USE_PULP_PARAMETERS - #( - .Cfg ( PulpClusterCfg ) - ) -`endif - cluster_i ( - .clk_i ( s_clk ), - .rst_ni ( s_rstn ), - .pwr_on_rst_ni ( s_rstn ), - .ref_clk_i ( s_clk ), - .axi_isolate_i ( '0 ), - .axi_isolated_o ( ), - .axi_isolated_wide_o ( ), - - .pmu_mem_pwdn_i ( 1'b0 ), - - .base_addr_i ( ClustBase[31:28] ), - - .dma_pe_evt_ack_i ( '1 ), - .dma_pe_evt_valid_o ( ), - - .dma_pe_irq_ack_i ( 1'b1 ), - .dma_pe_irq_valid_o ( ), - - .dbg_irq_valid_i ( '0 ), - .mbox_irq_i ( '0 ), - - .pf_evt_ack_i ( 1'b1 ), - .pf_evt_valid_o ( ), - - .async_cluster_events_wptr_i ( '0 ), - .async_cluster_events_rptr_o ( ), - .async_cluster_events_data_i ( '0 ), - - .en_sa_boot_i ( s_cluster_en_sa_boot ), - .test_mode_i ( 1'b0 ), - .fetch_en_i ( s_cluster_fetch_en ), - .eoc_o ( s_cluster_eoc ), - .busy_o ( s_cluster_busy ), - .cluster_id_i ( ClustIdx ), - - .async_data_master_aw_wptr_o ( async_cluster_to_soc_axi_bus.aw_wptr ), - .async_data_master_aw_rptr_i ( async_cluster_to_soc_axi_bus.aw_rptr ), - .async_data_master_aw_data_o ( async_cluster_to_soc_axi_bus.aw_data ), - .async_data_master_ar_wptr_o ( async_cluster_to_soc_axi_bus.ar_wptr ), - .async_data_master_ar_rptr_i ( async_cluster_to_soc_axi_bus.ar_rptr ), - .async_data_master_ar_data_o ( async_cluster_to_soc_axi_bus.ar_data ), - .async_data_master_w_data_o ( async_cluster_to_soc_axi_bus.w_data ), - .async_data_master_w_wptr_o ( async_cluster_to_soc_axi_bus.w_wptr ), - .async_data_master_w_rptr_i ( async_cluster_to_soc_axi_bus.w_rptr ), - .async_data_master_r_wptr_i ( async_cluster_to_soc_axi_bus.r_wptr ), - .async_data_master_r_rptr_o ( async_cluster_to_soc_axi_bus.r_rptr ), - .async_data_master_r_data_i ( async_cluster_to_soc_axi_bus.r_data ), - .async_data_master_b_wptr_i ( async_cluster_to_soc_axi_bus.b_wptr ), - .async_data_master_b_rptr_o ( async_cluster_to_soc_axi_bus.b_rptr ), - .async_data_master_b_data_i ( async_cluster_to_soc_axi_bus.b_data ), - - .async_wide_master_aw_wptr_o ( async_dma_axi_bus.aw_wptr ), - .async_wide_master_aw_rptr_i ( async_dma_axi_bus.aw_rptr ), - .async_wide_master_aw_data_o ( async_dma_axi_bus.aw_data ), - .async_wide_master_ar_wptr_o ( async_dma_axi_bus.ar_wptr ), - .async_wide_master_ar_rptr_i ( async_dma_axi_bus.ar_rptr ), - .async_wide_master_ar_data_o ( async_dma_axi_bus.ar_data ), - .async_wide_master_w_data_o ( async_dma_axi_bus.w_data ), - .async_wide_master_w_wptr_o ( async_dma_axi_bus.w_wptr ), - .async_wide_master_w_rptr_i ( async_dma_axi_bus.w_rptr ), - .async_wide_master_r_wptr_i ( async_dma_axi_bus.r_wptr ), - .async_wide_master_r_rptr_o ( async_dma_axi_bus.r_rptr ), - .async_wide_master_r_data_i ( async_dma_axi_bus.r_data ), - .async_wide_master_b_wptr_i ( async_dma_axi_bus.b_wptr ), - .async_wide_master_b_rptr_o ( async_dma_axi_bus.b_rptr ), - .async_wide_master_b_data_i ( async_dma_axi_bus.b_data ), - - .async_data_slave_aw_wptr_i ( async_soc_to_cluster_axi_bus.aw_wptr ), - .async_data_slave_aw_rptr_o ( async_soc_to_cluster_axi_bus.aw_rptr ), - .async_data_slave_aw_data_i ( async_soc_to_cluster_axi_bus.aw_data ), - .async_data_slave_ar_wptr_i ( async_soc_to_cluster_axi_bus.ar_wptr ), - .async_data_slave_ar_rptr_o ( async_soc_to_cluster_axi_bus.ar_rptr ), - .async_data_slave_ar_data_i ( async_soc_to_cluster_axi_bus.ar_data ), - .async_data_slave_w_data_i ( async_soc_to_cluster_axi_bus.w_data ), - .async_data_slave_w_wptr_i ( async_soc_to_cluster_axi_bus.w_wptr ), - .async_data_slave_w_rptr_o ( async_soc_to_cluster_axi_bus.w_rptr ), - .async_data_slave_r_wptr_o ( async_soc_to_cluster_axi_bus.r_wptr ), - .async_data_slave_r_rptr_i ( async_soc_to_cluster_axi_bus.r_rptr ), - .async_data_slave_r_data_o ( async_soc_to_cluster_axi_bus.r_data ), - .async_data_slave_b_wptr_o ( async_soc_to_cluster_axi_bus.b_wptr ), - .async_data_slave_b_rptr_i ( async_soc_to_cluster_axi_bus.b_rptr ), - .async_data_slave_b_data_o ( async_soc_to_cluster_axi_bus.b_data ) - ); - // Load ELF binary file task load_binary; input string binary; // File name From ba4f9c8b27761c5ab4e9412a2e5ddc643a459713 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 7 Aug 2025 17:26:39 +0200 Subject: [PATCH 35/67] propagate same variable for consistency Signed-off-by: Daniel Keller --- rtl/pulp_cluster.sv | 76 ++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 162d7886..54601515 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -343,13 +343,13 @@ logic s_dma_fc_irq; // - MCHAN: Always disable wide port (uses narrow port only) // - iDMA: Use Cfg.EnableWidePort parameter `ifdef TARGET_MCHAN - localparam bit WidePortShouldBeEnabled = 1'b0; // MCHAN never needs wide ports + localparam bit EnableWidePort = 1'b0; // MCHAN never needs wide ports `else - localparam bit WidePortShouldBeEnabled = Cfg.EnableWidePort; // User-configurable for iDMA + localparam bit EnableWidePort = Cfg.EnableWidePort; // User-configurable for iDMA `endif // Wide AXI infrastructure: Conditional implementation based on EnableWidePort -// - MCHAN: Always uses narrow transfers (WidePortShouldBeEnabled = 0) +// - MCHAN: Always uses narrow transfers (EnableWidePort = 0) // - iDMA with EnableWidePort=1: Uses wide transfers (256-bit AXI) // - iDMA with EnableWidePort=0: Uses narrow transfers (64-bit AXI) // - Wide infrastructure present for interface compatibility @@ -806,7 +806,7 @@ cluster_interconnect_wrap #( //*************************************************** //*********************DMAC WRAP********************* //*************************************************** -if (WidePortShouldBeEnabled) begin : gen_wide_port_idma +if (EnableWidePort) begin : gen_wide_port_idma dmac_wrap #( .NB_CORES ( Cfg.NumCores ), .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), @@ -1732,7 +1732,7 @@ c2s_remap_req_t src_remap_req; c2s_remap_resp_t src_remap_resp; // Connect DMA narrow master when wide port disabled, otherwise cluster bus master -if (WidePortShouldBeEnabled) begin : gen_cluster_bus_narrow_master +if (EnableWidePort) begin : gen_cluster_bus_narrow_master `AXI_ASSIGN_REQ_STRUCT(src_remap_req, s_data_master_req) `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp, src_remap_resp) end else begin : gen_dma_narrow_master @@ -1850,48 +1850,48 @@ axi_isolate #( ); axi_cdc_src #( - .aw_chan_t ( c2s_aw_chan_t ), - .w_chan_t ( c2s_w_chan_t ), - .b_chan_t ( c2s_b_chan_t ), - .r_chan_t ( c2s_r_chan_t ), - .ar_chan_t ( c2s_ar_chan_t ), - .axi_req_t ( c2s_req_t ), - .axi_resp_t ( c2s_resp_t ), - .LogDepth ( Cfg.AxiCdcLogDepth ), - .SyncStages ( Cfg.AxiCdcSyncStages ) + .aw_chan_t ( c2s_aw_chan_t ), + .w_chan_t ( c2s_w_chan_t ), + .b_chan_t ( c2s_b_chan_t ), + .r_chan_t ( c2s_r_chan_t ), + .ar_chan_t ( c2s_ar_chan_t ), + .axi_req_t ( c2s_req_t ), + .axi_resp_t ( c2s_resp_t ), + .LogDepth ( Cfg.AxiCdcLogDepth ), + .SyncStages ( Cfg.AxiCdcSyncStages ) ) axi_master_cdc_i ( - .src_rst_ni ( pwr_on_rst_ni ), - .src_clk_i ( clk_i ), - .src_req_i ( src_req ), - .src_resp_o ( src_resp ), - .async_data_master_aw_wptr_o ( async_data_master_aw_wptr_o ), - .async_data_master_aw_rptr_i ( async_data_master_aw_rptr_i ), - .async_data_master_aw_data_o ( async_data_master_aw_data_o ), - .async_data_master_w_wptr_o ( async_data_master_w_wptr_o ), - .async_data_master_w_rptr_i ( async_data_master_w_rptr_i ), - .async_data_master_w_data_o ( async_data_master_w_data_o ), - .async_data_master_ar_wptr_o ( async_data_master_ar_wptr_o ), - .async_data_master_ar_rptr_i ( async_data_master_ar_rptr_i ), - .async_data_master_ar_data_o ( async_data_master_ar_data_o ), - .async_data_master_b_wptr_i ( async_data_master_b_wptr_i ), - .async_data_master_b_rptr_o ( async_data_master_b_rptr_o ), - .async_data_master_b_data_i ( async_data_master_b_data_i ), - .async_data_master_r_wptr_i ( async_data_master_r_wptr_i ), - .async_data_master_r_rptr_o ( async_data_master_r_rptr_o ), - .async_data_master_r_data_i ( async_data_master_r_data_i ) + .src_rst_ni ( pwr_on_rst_ni ), + .src_clk_i ( clk_i ), + .src_req_i ( src_req ), + .src_resp_o ( src_resp ), + .async_data_master_aw_wptr_o ( async_data_master_aw_wptr_o ), + .async_data_master_aw_rptr_i ( async_data_master_aw_rptr_i ), + .async_data_master_aw_data_o ( async_data_master_aw_data_o ), + .async_data_master_w_wptr_o ( async_data_master_w_wptr_o ), + .async_data_master_w_rptr_i ( async_data_master_w_rptr_i ), + .async_data_master_w_data_o ( async_data_master_w_data_o ), + .async_data_master_ar_wptr_o ( async_data_master_ar_wptr_o ), + .async_data_master_ar_rptr_i ( async_data_master_ar_rptr_i ), + .async_data_master_ar_data_o ( async_data_master_ar_data_o ), + .async_data_master_b_wptr_i ( async_data_master_b_wptr_i ), + .async_data_master_b_rptr_o ( async_data_master_b_rptr_o ), + .async_data_master_b_data_i ( async_data_master_b_data_i ), + .async_data_master_r_wptr_i ( async_data_master_r_wptr_i ), + .async_data_master_r_rptr_o ( async_data_master_r_rptr_o ), + .async_data_master_r_data_i ( async_data_master_r_data_i ) ); // Cluster to Soc (wide) c2s_wide_req_t src_wide_req, isolate_src_wide_req; c2s_wide_resp_t src_wide_resp, isolate_src_wide_resp; -// Route DMA master request/response based on WidePortShouldBeEnabled -assign isolate_src_wide_req = WidePortShouldBeEnabled ? s_dma_master_req : s_dma_narrow_master_req; -assign s_dma_master_resp = WidePortShouldBeEnabled ? isolate_src_wide_resp : s_dma_narrow_master_resp; +// Route DMA master request/response based on EnableWidePort +assign isolate_src_wide_req = EnableWidePort ? s_dma_master_req : s_dma_narrow_master_req; +assign s_dma_master_resp = EnableWidePort ? isolate_src_wide_resp : s_dma_narrow_master_resp; // Instantiate wide port isolation and CDC only when enabled generate - if (WidePortShouldBeEnabled) begin : gen_wide_port + if (EnableWidePort) begin : gen_wide_port axi_isolate #( .NumPending ( 8 ), .TerminateTransaction ( 1 ), @@ -2110,7 +2110,7 @@ initial begin : p_assert else $fatal(1, "When using MCHAN, Cfg.DmaNumPlugs must be 4!"); assert(!Cfg.DmaUseHwpePort) else $fatal(1, "When using MCHAN, Cfg.DmaUseHwpePort must be 0!"); - assert(!WidePortShouldBeEnabled) + assert(!EnableWidePort) else $fatal(1, "When using MCHAN, wide port should be disabled!"); `else if (!Cfg.DmaUseHwpePort) begin From 873884d6580a010b2af804390952847b712f5910 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 7 Aug 2025 18:07:41 +0200 Subject: [PATCH 36/67] build different tests for using idma and mchan Signed-off-by: Daniel Keller --- bender-common.mk | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/bender-common.mk b/bender-common.mk index 8a1a4a8b..4297d044 100644 --- a/bender-common.mk +++ b/bender-common.mk @@ -11,7 +11,16 @@ common_defs += -D CLUSTER_ALIAS common_defs += -D USE_PULP_PARAMETERS common_targs += -t rtl -# common_targs += -t mchan +# DMA configuration +DMA_TYPE ?= idma + +ifeq ($(DMA_TYPE),mchan) + common_targs += -t mchan + common_defs += -D TARGET_MCHAN +else + # Default to iDMA +endif + common_targs += -t cluster_standalone common_targs += -t scm_use_fpga_scm common_targs += -t cv32e40p_use_ff_regfile \ No newline at end of file From f8944c1bbeb1bb9424ddb6a4fc2ed629f56887d1 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 7 Aug 2025 21:51:35 +0200 Subject: [PATCH 37/67] trigger job Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 62727a28..2a9fe013 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -753,6 +753,7 @@ module pulp_cluster_tb; w_beat.w_strb = '1; w_beat.w_last = '1; + axi_master_drv.send_aw(aw_beat); axi_master_drv.send_w(w_beat); @(posedge s_clk); From 51b3a158d24a0ecfa1da0abaf4188227bd0b2140 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 10:57:11 +0200 Subject: [PATCH 38/67] display TB configuration Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 2a9fe013..ae3a0e0e 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -211,6 +211,7 @@ module pulp_cluster_tb; generate if (EnableWidePort) begin : gen_dma_buses + $display("[TB] Instantiting cluster with wide ports\n"); AXI_BUS #( .AXI_ADDR_WIDTH( AxiAw ), .AXI_DATA_WIDTH( DmaAxiDw ), @@ -385,6 +386,7 @@ module pulp_cluster_tb; logic async_dma_axi_bus_b_rptr; logic async_dma_axi_bus_b_data; + $display("[TB] Instantiting cluster with narrow ports\n"); pulp_cluster `ifdef USE_PULP_PARAMETERS #( .Cfg ( PulpClusterCfg ) ) From 6b0d98bcec97372def4088395ad1eab1f9a2347a Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 11:08:38 +0200 Subject: [PATCH 39/67] fix: move display to initial block Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index ae3a0e0e..d9d3dd51 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -209,9 +209,17 @@ module pulp_cluster_tb; default: '0 }; + initial begin + if (EnableWidePort) begin + $display("[TB] Instantiating cluster with wide ports"); + end else begin + $display("[TB] Instantiating cluster with narrow ports"); + end + end + generate if (EnableWidePort) begin : gen_dma_buses - $display("[TB] Instantiting cluster with wide ports\n"); + // $display("[TB] Instantiting cluster with wide ports\n"); AXI_BUS #( .AXI_ADDR_WIDTH( AxiAw ), .AXI_DATA_WIDTH( DmaAxiDw ), @@ -386,7 +394,7 @@ module pulp_cluster_tb; logic async_dma_axi_bus_b_rptr; logic async_dma_axi_bus_b_data; - $display("[TB] Instantiting cluster with narrow ports\n"); + // $display("[TB] Instantiting cluster with narrow ports\n"); pulp_cluster `ifdef USE_PULP_PARAMETERS #( .Cfg ( PulpClusterCfg ) ) From 6ef07a2610c093b4158bffbee0505b6aa1cb81cb Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 14:08:12 +0200 Subject: [PATCH 40/67] [wip] add support for wide DMA port toggle in Makefile Signed-off-by: Daniel Keller --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index c1fd2776..65edf697 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,12 @@ REGRESSIONS := $(ROOT_DIR)/regression_tests VLOG_ARGS += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 -timescale \"1 ns / 1 ps\" \"+incdir+$(shell pwd)/include\" +# TB's wide DMA port toggle override +TB_ENABLE_WIDE_PORT ?= 1 +ifneq ($(strip $(TB_ENABLE_WIDE_PORT)),) +VLOG_ARGS += +define+TB_ENABLE_WIDE_PORT=$(TB_ENABLE_WIDE_PORT) +endif + define generate_vsim echo 'set ROOT [file normalize [file dirname [info script]]/$3]' > $1 $(BENDER) script vsim --vlog-arg="$(VLOG_ARGS)" $2 | grep -v "set ROOT" >> $1 From 04aef50288d2277943e2045c43d6d88299a22315 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 14:31:35 +0200 Subject: [PATCH 41/67] trigger job Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index d9d3dd51..83b55ce2 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -24,7 +24,7 @@ import "DPI-C" function read_elf(input string filename); import "DPI-C" function byte get_section(output longint address, output longint len); import "DPI-C" context function byte read_section(input longint address, inout byte buffer[], input longint len); -// PULP Cluster flavors +// PULP Cluster flavors override `ifndef TB_ENABLE_WIDE_PORT `define TB_ENABLE_WIDE_PORT 1 `endif From 54c324740248178f7dc6f56488924f772cc61623 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 14:46:28 +0200 Subject: [PATCH 42/67] remove nonfree artifacts; trigger job Signed-off-by: Daniel Keller --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 65edf697..6e722e05 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,7 @@ NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.gi NONFREE_BRANCH ?= dkeller/chimera-v2 nonfree-init: + rm -rf nonfree; git clone --single-branch --branch $(NONFREE_BRANCH) $(NONFREE_REMOTE) nonfree cd nonfree && git checkout $(NONFREE_COMMIT) From d14fd7d6c2435faef013e40c86af48795e32abb7 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 15:06:51 +0200 Subject: [PATCH 43/67] trigger job --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 6e722e05..6d17e449 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ else QUESTA ?= endif + BENDER ?= bender PYTHON ?= python3 From 611c337e19082e488558e628339fd06a75db2708 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 15:38:47 +0200 Subject: [PATCH 44/67] unset default Signed-off-by: Daniel Keller --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6d17e449..323e1d20 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,8 @@ REGRESSIONS := $(ROOT_DIR)/regression_tests VLOG_ARGS += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 -timescale \"1 ns / 1 ps\" \"+incdir+$(shell pwd)/include\" # TB's wide DMA port toggle override -TB_ENABLE_WIDE_PORT ?= 1 + +export TB_ENABLE_WIDE_PORT ifneq ($(strip $(TB_ENABLE_WIDE_PORT)),) VLOG_ARGS += +define+TB_ENABLE_WIDE_PORT=$(TB_ENABLE_WIDE_PORT) endif From cf00859f61756289b94d222a595fd44c1df4074c Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 15:44:48 +0200 Subject: [PATCH 45/67] avoid removing Bender.lock Signed-off-by: Daniel Keller --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index 323e1d20..eefdc5b5 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,6 @@ REGRESSIONS := $(ROOT_DIR)/regression_tests VLOG_ARGS += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 -timescale \"1 ns / 1 ps\" \"+incdir+$(shell pwd)/include\" # TB's wide DMA port toggle override - export TB_ENABLE_WIDE_PORT ifneq ($(strip $(TB_ENABLE_WIDE_PORT)),) VLOG_ARGS += +define+TB_ENABLE_WIDE_PORT=$(TB_ENABLE_WIDE_PORT) @@ -173,7 +172,6 @@ run: .PHONY: clean clean: - rm -rf Bender.lock rm -rf scripts/synth-compile.tcl rm -rf scripts/compile.tcl rm -rf transcript From 6bc47070592c6949e0e7c6979ab9c3c3606657fc Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 16:56:14 +0200 Subject: [PATCH 46/67] include legacy MCHAN configuration Signed-off-by: Daniel Keller --- include/pulp_soc_defines.sv | 13 +++++++++++-- rtl/pulp_cluster.sv | 9 ++------- tb/pulp_cluster_tb.sv | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/pulp_soc_defines.sv b/include/pulp_soc_defines.sv index f9602f1c..f8470b45 100644 --- a/include/pulp_soc_defines.sv +++ b/include/pulp_soc_defines.sv @@ -43,12 +43,21 @@ `define CLUST_FP_DIVSQRT 1 `define CLUST_SHARED_FP 2 `define CLUST_SHARED_FP_DIVSQRT 2 -`endif +`endif //PARAMETRES `define NB_CLUSTERS 1 `define NB_CORES 8 -`define NB_DMAS 2 +`define NB_DMAS 4 `define NB_EXT 4 `define NB_MPERIPHS 1 `define NB_SPERIPHS 12 + + +`ifdef TARGET_MCHAN + `define NB_DMAS 4 + `define DMA_USE_HWPE_PORT 0 +`else + `define NB_DMAS 2 + `define DMA_USE_HWPE_PORT 1 +`endif diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 54601515..0807f9ef 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -883,13 +883,8 @@ end else begin : gen_narrow_port_idma .pe_ctrl_slave ( s_periph_dma_bus[1:0] ), .ctrl_slave ( s_core_dmactrl_bus ), .tcdm_master ( s_hci_dma ), - `ifdef TARGET_MCHAN - .ext_master_req_o ( /* MCHAN uses narrow port - not connected to wide */ ), - .ext_master_resp_i ( '0 ), - `else - .ext_master_req_o ( {s_dma_narrow_master_req} ), - .ext_master_resp_i ( {s_dma_narrow_master_resp} ), - `endif + .ext_master_req_o ( {s_dma_narrow_master_req} ), + .ext_master_resp_i ( {s_dma_narrow_master_resp} ), .term_event_o ( s_dma_event ), .term_irq_o ( s_dma_irq ), .term_event_pe_o ( {s_dma_fc_event, s_dma_cl_event} ), diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 83b55ce2..677af13d 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -148,7 +148,7 @@ module pulp_cluster_tb; DmaNumPlugs: `NB_DMAS, DmaNumOutstandingBursts: 8, DmaBurstLength: 256, - DmaUseHwpePort: 1, + DmaUseHwpePort: `DMA_USE_HWPE_PORT, NumMstPeriphs: `NB_MPERIPHS, NumSlvPeriphs: `NB_SPERIPHS, ClusterAlias: 1, From f19b6a831a26609206456bf183a33369e044f9a8 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 18:12:31 +0200 Subject: [PATCH 47/67] trigger CI job Signed-off-by: Daniel Keller --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index eefdc5b5..f5b44fa4 100644 --- a/Makefile +++ b/Makefile @@ -176,6 +176,7 @@ clean: rm -rf scripts/compile.tcl rm -rf transcript rm -rf *.log + #################### # Regression tests # #################### From 2c432888d42c23bdf53dee6f2d77b4bef3fb390a Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 8 Aug 2025 18:35:17 +0200 Subject: [PATCH 48/67] trigger CI job Signed-off-by: Daniel Keller --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index f5b44fa4..8796af4a 100644 --- a/Makefile +++ b/Makefile @@ -180,5 +180,4 @@ clean: #################### # Regression tests # #################### - include regression.mk From 45938b2f1adfcc286ce3d081fc6cda5bcf607bc6 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Mon, 11 Aug 2025 09:40:07 +0200 Subject: [PATCH 49/67] refactor: replace EnableWidePort with Cfg.EnableWidePort for consistency in DMA configuration Signed-off-by: Daniel Keller --- rtl/pulp_cluster.sv | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 0807f9ef..b16a73d1 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -342,11 +342,11 @@ logic s_dma_fc_irq; // Determine if wide AXI port should be enabled based on DMA type and configuration // - MCHAN: Always disable wide port (uses narrow port only) // - iDMA: Use Cfg.EnableWidePort parameter -`ifdef TARGET_MCHAN - localparam bit EnableWidePort = 1'b0; // MCHAN never needs wide ports -`else - localparam bit EnableWidePort = Cfg.EnableWidePort; // User-configurable for iDMA -`endif +// `ifdef TARGET_MCHAN +// localparam bit EnableWidePort = 1'b0; // MCHAN never needs wide ports +// `else +// localparam bit EnableWidePort = Cfg.EnableWidePort; // User-configurable for iDMA +// `endif // Wide AXI infrastructure: Conditional implementation based on EnableWidePort // - MCHAN: Always uses narrow transfers (EnableWidePort = 0) @@ -806,7 +806,7 @@ cluster_interconnect_wrap #( //*************************************************** //*********************DMAC WRAP********************* //*************************************************** -if (EnableWidePort) begin : gen_wide_port_idma +if (Cfg.EnableWidePort) begin : gen_wide_port_idma dmac_wrap #( .NB_CORES ( Cfg.NumCores ), .NB_OUTSND_BURSTS ( Cfg.DmaNumOutstandingBursts ), @@ -1727,7 +1727,7 @@ c2s_remap_req_t src_remap_req; c2s_remap_resp_t src_remap_resp; // Connect DMA narrow master when wide port disabled, otherwise cluster bus master -if (EnableWidePort) begin : gen_cluster_bus_narrow_master +if (Cfg.EnableWidePort) begin : gen_cluster_bus_narrow_master `AXI_ASSIGN_REQ_STRUCT(src_remap_req, s_data_master_req) `AXI_ASSIGN_RESP_STRUCT(s_data_master_resp, src_remap_resp) end else begin : gen_dma_narrow_master @@ -1881,12 +1881,12 @@ c2s_wide_req_t src_wide_req, isolate_src_wide_req; c2s_wide_resp_t src_wide_resp, isolate_src_wide_resp; // Route DMA master request/response based on EnableWidePort -assign isolate_src_wide_req = EnableWidePort ? s_dma_master_req : s_dma_narrow_master_req; -assign s_dma_master_resp = EnableWidePort ? isolate_src_wide_resp : s_dma_narrow_master_resp; +assign isolate_src_wide_req = Cfg.EnableWidePort ? s_dma_master_req : s_dma_narrow_master_req; +assign s_dma_master_resp = Cfg.EnableWidePort ? isolate_src_wide_resp : s_dma_narrow_master_resp; // Instantiate wide port isolation and CDC only when enabled generate - if (EnableWidePort) begin : gen_wide_port + if (Cfg.EnableWidePort) begin : gen_wide_port axi_isolate #( .NumPending ( 8 ), .TerminateTransaction ( 1 ), @@ -2105,7 +2105,7 @@ initial begin : p_assert else $fatal(1, "When using MCHAN, Cfg.DmaNumPlugs must be 4!"); assert(!Cfg.DmaUseHwpePort) else $fatal(1, "When using MCHAN, Cfg.DmaUseHwpePort must be 0!"); - assert(!EnableWidePort) + assert(!Cfg.EnableWidePort) else $fatal(1, "When using MCHAN, wide port should be disabled!"); `else if (!Cfg.DmaUseHwpePort) begin From 56c173bcd0a7aa051cc01aaa168d4ab1068f2cfd Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Mon, 11 Aug 2025 09:45:23 +0200 Subject: [PATCH 50/67] refactor: NB_DMAS no longer overwritten; use macro defined DMA_USE_HWPE_PORT also in the package as reference Signed-off-by: Daniel Keller --- include/pulp_soc_defines.sv | 2 -- packages/pulp_cluster_package.sv | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/include/pulp_soc_defines.sv b/include/pulp_soc_defines.sv index f8470b45..eda1b588 100644 --- a/include/pulp_soc_defines.sv +++ b/include/pulp_soc_defines.sv @@ -48,12 +48,10 @@ //PARAMETRES `define NB_CLUSTERS 1 `define NB_CORES 8 -`define NB_DMAS 4 `define NB_EXT 4 `define NB_MPERIPHS 1 `define NB_SPERIPHS 12 - `ifdef TARGET_MCHAN `define NB_DMAS 4 `define DMA_USE_HWPE_PORT 0 diff --git a/packages/pulp_cluster_package.sv b/packages/pulp_cluster_package.sv index 439bf703..41563a12 100644 --- a/packages/pulp_cluster_package.sv +++ b/packages/pulp_cluster_package.sv @@ -179,9 +179,6 @@ package pulp_cluster_package; // Address remap for virtualization bit EnableRemapAddress; // Enable wide AXI master port for high-bandwidth DMA transfers - // When disabled (0): Wide AXI ports are tied off, DMA uses narrow transfers only - // When enabled (1): DMA can use wide AXI port for high-bandwidth transfers - // Automatically disabled when using MCHAN (TARGET_MCHAN defined) bit EnableWidePort; // Enable Snitch ICache bit SnitchICache; @@ -222,7 +219,7 @@ package pulp_cluster_package; DmaNumPlugs: NumDmas, DmaNumOutstandingBursts: 8, DmaBurstLength: 256, - DmaUseHwpePort: 0, + DmaUseHwpePort: `DMA_USE_HWPE_PORT, NumMstPeriphs: NB_MPERIPHS, NumSlvPeriphs: NB_SPERIPHS, ClusterAlias: 1, From 8eaafc37b321366606737b0514bb0a479f50ca30 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Tue, 12 Aug 2025 09:17:24 +0200 Subject: [PATCH 51/67] align wave.tcl for gui with new TB; add visible package instantiation for Questa Signed-off-by: Daniel Keller --- scripts/wave.tcl | 60 ++++++++++++++++++++++++++++++------------- tb/pulp_cluster_tb.sv | 3 +++ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/scripts/wave.tcl b/scripts/wave.tcl index 09f55650..594813fd 100644 --- a/scripts/wave.tcl +++ b/scripts/wave.tcl @@ -7,32 +7,56 @@ onerror {resume} quietly WaveActivateNextPane {} 0 -set CORE_TYPE [examine sim:/pulp_cluster_tb/PulpClusterCfg.CoreType] -set NUM_CORES [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfg.NumCores] -set HMR_PRESENT [examine sim:/pulp_cluster_tb/PulpClusterCfg.HMRPresent] -set HWPE_PRESENT [examine sim:/pulp_cluster_tb/PulpClusterCfg.HwpePresent] -set HWPE_LIST [examine sim:/pulp_cluster_tb/PulpClusterCfg.HwpeCfg.HwpeList] -set NUM_HWPE [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfg.HwpeCfg.NumHwpes] +set CORE_TYPE [examine -radix symbolic sim:/pulp_cluster_tb/PulpClusterCfgVis.CoreType] +set NUM_CORES [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.NumCores] +set HMR_PRESENT [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.HMRPresent] +set HWPE_PRESENT [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpePresent] +set HWPE_LIST [examine sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpeCfg.HwpeList] +set NUM_HWPE [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpeCfg.NumHwpes] + +set ENABLE_WIDE [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.EnableWidePort] +if { $ENABLE_WIDE == 1 } { + set CLUSTER_PATH /pulp_cluster_tb/gen_dma_buses/cluster_i +} else { + set CLUSTER_PATH /pulp_cluster_tb/gen_dma_stubs/cluster_i +} # Cluster -add wave -noupdate -group cluster /pulp_cluster_tb/cluster_i/* -# HMR -add wave -noupdate -group hmr /pulp_cluster_tb/cluster_i/gen_hmr_unit/i_hmr_unit/* +add wave -noupdate -group cluster $CLUSTER_PATH/* + +# HMR (if present) +if { $HMR_PRESENT != 0 } { + set hmr_path "$CLUSTER_PATH/gen_hmr_unit/i_hmr_unit/*" + set hmr_matches [find signals $hmr_path] + if { [llength $hmr_matches] > 0 } { + add wave -noupdate -group hmr $hmr_path + } +} + # Peripherals -add wave -noupdate -group cluster_peripherals /pulp_cluster_tb/cluster_i/cluster_peripherals_i/* +add wave -noupdate -group cluster_peripherals $CLUSTER_PATH/cluster_peripherals_i/* # Control Unit -add wave -noupdate -group cluster_control_unit /pulp_cluster_tb/cluster_i/cluster_peripherals_i/cluster_control_unit_i/* -# HWPE Subsystem -add wave -noupdate -group hwpe_subsystem /pulp_cluster_tb/cluster_i/hwpe_gen/hwpe_subsystem_i/* +add wave -noupdate -group cluster_control_unit $CLUSTER_PATH/cluster_peripherals_i/cluster_control_unit_i/* +# HWPE Subsystem (if present) +if { $HWPE_PRESENT != 0 } { + add wave -noupdate -group hwpe_subsystem $CLUSTER_PATH/hwpe_gen/hwpe_subsystem_i/* +} for {set i 0} {$i < $NUM_HWPE} {incr i} { - #set HWPE_NAME [tolower $HWPE_LIST($i)] - set HWPE_NAME [string tolower [examine sim:/pulp_cluster_tb/PulpClusterCfg.HwpeCfg.HwpeList[$i]]] - add wave -noupdate -group hwpe_subsystem -group $HWPE_NAME /pulp_cluster_tb/cluster_i/hwpe_gen/hwpe_subsystem_i/gen_hwpe[$i]/gen_$HWPE_NAME/i_$HWPE_NAME/* + if { $HWPE_PRESENT != 0 } { + # Try for different HWPEs + foreach hwpe_kind {redmule neureka softex} { + set inst_path "$CLUSTER_PATH/hwpe_gen/hwpe_subsystem_i/gen_hwpe[$i]/gen_${hwpe_kind}/i_${hwpe_kind}/*" + set matches [find signals $inst_path] + if { [llength $matches] > 0 } { + add wave -noupdate -group hwpe_subsystem -group $hwpe_kind $inst_path + } + } + } } # Cores for {set i 0} {$i < $NUM_CORES} {incr i} { - add wave -noupdate -group Core[$i] -group core_region /pulp_cluster_tb/cluster_i/CORE[$i]/core_region_i/* - add wave -noupdate -group Core[$i] -group core_region -group core /pulp_cluster_tb/cluster_i/CORE[$i]/core_region_i/${CORE_TYPE}_CORE/${CORE_TYPE}_CORE/* + add wave -noupdate -group Core[$i] -group core_region $CLUSTER_PATH/CORE[$i]/core_region_i/* + add wave -noupdate -group Core[$i] -group core_region -group core $CLUSTER_PATH/CORE[$i]/core_region_i/${CORE_TYPE}_CORE/${CORE_TYPE}_CORE/* } configure wave -timelineunits ns diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 677af13d..deeda835 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -209,6 +209,9 @@ module pulp_cluster_tb; default: '0 }; + // Questasim visible; keep true to avoid optimization + (* keep = "true" *) pulp_cluster_package::pulp_cluster_cfg_t PulpClusterCfgVis = PulpClusterCfg; + initial begin if (EnableWidePort) begin $display("[TB] Instantiating cluster with wide ports"); From 59590d3ad8cc05127f7b050220f536cebd98d3c5 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Tue, 12 Aug 2025 09:42:25 +0200 Subject: [PATCH 52/67] clean up signals and comments Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index deeda835..7de4a796 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -222,7 +222,6 @@ module pulp_cluster_tb; generate if (EnableWidePort) begin : gen_dma_buses - // $display("[TB] Instantiting cluster with wide ports\n"); AXI_BUS #( .AXI_ADDR_WIDTH( AxiAw ), .AXI_DATA_WIDTH( DmaAxiDw ), @@ -380,24 +379,6 @@ module pulp_cluster_tb; ); end else begin : gen_dma_stubs - // Dummy wires with correct width for cluster port connections - logic async_dma_axi_bus_aw_wptr; - logic async_dma_axi_bus_aw_rptr; - logic async_dma_axi_bus_aw_data; - logic async_dma_axi_bus_ar_wptr; - logic async_dma_axi_bus_ar_rptr; - logic async_dma_axi_bus_ar_data; - logic async_dma_axi_bus_w_wptr; - logic async_dma_axi_bus_w_rptr; - logic async_dma_axi_bus_w_data; - logic async_dma_axi_bus_r_wptr; - logic async_dma_axi_bus_r_rptr; - logic async_dma_axi_bus_r_data; - logic async_dma_axi_bus_b_wptr; - logic async_dma_axi_bus_b_rptr; - logic async_dma_axi_bus_b_data; - - // $display("[TB] Instantiting cluster with narrow ports\n"); pulp_cluster `ifdef USE_PULP_PARAMETERS #( .Cfg ( PulpClusterCfg ) ) From 9c74eed2443d52c5e4f04af77e5a1d03a800eab6 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Tue, 12 Aug 2025 09:46:53 +0200 Subject: [PATCH 53/67] trigger CI Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 7de4a796..76618ddc 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -52,6 +52,7 @@ module pulp_cluster_tb; .rst_no ( s_rstn ) ); + localparam AxiAw = 32; localparam AxiDw = 64; localparam AxiIw = 6; From 49e72830b129e099c8b94841193d1dbf199862f8 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 09:15:33 +0200 Subject: [PATCH 54/67] wip: update deps in lock file; add HCI filter signal; fix waves to take hex; roll back to astral version Neureka; 3 HWPEs added Signed-off-by: Daniel Keller --- Bender.lock | 18 +++++++++--------- Bender.yml | 4 ++-- Makefile | 5 ++++- rtl/cluster_interconnect_wrap.sv | 4 +++- rtl/cluster_peripherals.sv | 1 + rtl/hwpe_subsystem.sv | 4 ++++ rtl/pulp_cluster.sv | 5 +++++ scripts/wave.tcl | 6 +++--- tb/pulp_cluster_tb.sv | 2 +- 9 files changed, 32 insertions(+), 17 deletions(-) diff --git a/Bender.lock b/Bender.lock index 956f072c..a84dca89 100644 --- a/Bender.lock +++ b/Bender.lock @@ -7,8 +7,8 @@ packages: dependencies: - common_cells axi: - revision: f07498d53ecd5518b277c7d213ec3b71ca4df93c - version: 0.39.7 + revision: 78831b6feba265d5ee2683bbf42b4150f8a35c43 + version: 0.39.8 source: Git: https://github.com/pulp-platform/axi.git dependencies: @@ -72,7 +72,7 @@ packages: revision: 9afda9abb565971649c2aa0985639c096f351171 version: 1.38.0 source: - Git: https://github.com/pulp-platform/common_cells.git + Git: git@github.com:pulp-platform/common_cells.git dependencies: - common_verification - tech_cells_generic @@ -145,8 +145,8 @@ packages: dependencies: - tech_cells_generic hwpe-stream: - revision: b3d33afdd27e79bcda1348d0ab5f4afd52c03106 - version: 1.9.0 + revision: c1e25094bf9f2fee754611ef7491b7a1144e11b9 + version: 1.9.1 source: Git: https://github.com/pulp-platform/hwpe-stream.git dependencies: @@ -190,7 +190,7 @@ packages: dependencies: - common_cells neureka: - revision: 8e0883bd0c19c6844122456c749c2bdfde18011f + revision: 94528df2bc6d5eedc0439bd403c2ad005f0a7519 version: null source: Git: https://github.com/pulp-platform/neureka.git @@ -239,8 +239,8 @@ packages: - register_interface - tech_cells_generic register_interface: - revision: 5daa85d164cf6b54ad061ea1e4c6f3624556e467 - version: 0.4.5 + revision: 8e8c209ea559d3b54f45cf30fcce95ce70ff5e49 + version: 0.4.6 source: Git: https://github.com/pulp-platform/register_interface.git dependencies: @@ -279,7 +279,7 @@ packages: revision: 7968dd6e6180df2c644636bc6d2908a49f2190cf version: 0.2.13 source: - Git: https://github.com/pulp-platform/tech_cells_generic.git + Git: https://github.com/pulp-platform/tech_cells_generic dependencies: - common_verification timer_unit: diff --git a/Bender.yml b/Bender.yml index 647c891c..0ff29137 100644 --- a/Bender.yml +++ b/Bender.yml @@ -30,11 +30,11 @@ dependencies: cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 } ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } - hci: { git: "https://github.com/pulp-platform/hci.git", rev: 4257a73b391731d94077ad72fc528c27f5b11392 } # branch: smazzola/chimera + hci: { git: "https://github.com/pulp-platform/hci.git", rev: 5421524afca701e45a83d0f1ec47568c019e021a } # branch: main register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.5 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 9e31f7c6c24877eaf58279903e7a162b16c9a721 } # branch: astral-v0 redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 } - neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: 8e0883bd0c19c6844122456c749c2bdfde18011f } + neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: astral-v1.0 } softex: { git: "https://github.com/belanoa/softex.git" , rev: astral-v1.0 } obi: { git: "https://github.com/pulp-platform/obi.git", rev: v0.1.3 } diff --git a/Makefile b/Makefile index 8796af4a..9b9acda5 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,8 @@ QUESTA ?= endif -BENDER ?= bender +# BENDER ?= bender +BENDER ?= /local/user/dkm/Projects/pulp_cluster/bender-cust/target/debug/bender PYTHON ?= python3 VSIM ?= $(QUESTA) vsim @@ -72,6 +73,8 @@ Bender.lock: $(BENDER) checkout touch Bender.lock +update: + $(BENDER) update ###### # SW # diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index 6a86226d..69dbaa53 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -75,6 +75,7 @@ module cluster_interconnect_wrap // if DMA uses HWPE ports, ID width must be increased correspondingly localparam N_HCI_DMA_PORTS = DMA_USE_HWPE_PORT ? 0 : NB_DMAS; localparam N_HCI_HWPE_PORTS = DMA_USE_HWPE_PORT ? NB_HWPE + NB_DMAS : NB_HWPE; + localparam int unsigned HCI_FILTER_WRITE_R_VALID[0:N_HCI_HWPE_PORTS-1] = {1, 0, 0}; //-******************************************************** @@ -143,7 +144,8 @@ module cluster_interconnect_wrap .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), - .EXPFIFO ( 2 ), + .EXPFIFO ( 2 ), + .FILTER_WRITE_R_VALID ( HCI_FILTER_WRITE_R_VALID ), //For an explanation of these macros refer to https://github.com/pulp-platform/hci/blob/v2.1.1/rtl/common/hci_helpers.svh .`HCI_SIZE_PARAM(cores) ( HCI_CORE_SIZE ), .`HCI_SIZE_PARAM(mems) ( HCI_MEM_SIZE ), diff --git a/rtl/cluster_peripherals.sv b/rtl/cluster_peripherals.sv index c6e4a937..460c3483 100644 --- a/rtl/cluster_peripherals.sv +++ b/rtl/cluster_peripherals.sv @@ -101,6 +101,7 @@ module cluster_peripherals input logic [NB_CORES-1:0][3:0] hwpe_events_i, output logic hwpe_en_o, output logic [$clog2(NB_HWPES)-1:0] hwpe_sel_o, + // output logic [((NB_HWPES>1)?$clog2(NB_HWPES):1)-1:0] hwpe_sel_o, output hci_package::hci_interconnect_ctrl_t hci_ctrl_o, // Control ports diff --git a/rtl/hwpe_subsystem.sv b/rtl/hwpe_subsystem.sv index 269cfeab..87cc3dab 100644 --- a/rtl/hwpe_subsystem.sv +++ b/rtl/hwpe_subsystem.sv @@ -23,6 +23,8 @@ module hwpe_subsystem parameter int unsigned N_CORES = 8, parameter int unsigned N_MASTER_PORT = 9, parameter int unsigned ID_WIDTH = 8, + //parameter int unsigned N_HWPES = 8, + //parameter int unsigned HWPE_SEL_BITS = (N_HWPES > 1) ? $clog2(N_HWPES) : 1, parameter hci_package::hci_size_parameter_t HCI_HWPE_SIZE = '0 ) ( @@ -30,6 +32,7 @@ module hwpe_subsystem input logic rst_n, input logic test_mode, input logic hwpe_en_i, + //input logic [HWPE_SEL_BITS-1:0] hwpe_sel_i, input logic [$clog2(MAX_NUM_HWPES)-1:0] hwpe_sel_i, hci_core_intf.initiator hwpe_xbar_master, @@ -44,6 +47,7 @@ module hwpe_subsystem localparam int unsigned EW = HCI_HWPE_SIZE.EW; localparam int unsigned EHW = HCI_HWPE_SIZE.EHW; + // TODO: remove this once we have a proper way to get the number of HWPEs localparam int unsigned N_HWPES = HWPE_CFG.NumHwpes; localparam int unsigned HWPE_SEL_BITS = (N_HWPES > 1) ? $clog2(N_HWPES) : 1; diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index b16a73d1..474e78b2 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -278,6 +278,8 @@ logic [Cfg.NumCores-1:0] dbg_core_running; logic [Cfg.NumCores-1:0] s_dbg_irq; logic s_hwpe_en; logic [$clog2(MAX_NUM_HWPES)-1:0] s_hwpe_sel; +// localparam int unsigned HWPE_SEL_BITS = (Cfg.HwpeCfg.NumHwpes > 1) ? $clog2(Cfg.HwpeCfg.NumHwpes) : 1; +// logic [HWPE_SEL_BITS-1:0] s_hwpe_sel; logic fetch_en_synch; logic en_sa_boot_synch; @@ -900,6 +902,7 @@ end cluster_peripherals #( .NB_CORES ( Cfg.NumCores ), .NB_HWPES ( MAX_NUM_HWPES ), + //.NB_HWPES ( Cfg.HwpeCfg.NumHwpes ), .NB_MPERIPHS ( Cfg.NumMstPeriphs ), .NB_CACHE_BANKS ( Cfg.iCacheNumBanks), .NB_SPERIPHS ( Cfg.NumSlvPeriphs ), @@ -1345,6 +1348,8 @@ generate .HWPE_CFG ( Cfg.HwpeCfg ), .N_CORES ( Cfg.NumCores ), .N_MASTER_PORT ( Cfg.HwpeNumPorts ), + //.N_HWPES ( Cfg.HwpeCfg.NumHwpes ), + //.HWPE_SEL_BITS ( HWPE_SEL_BITS ), .ID_WIDTH ( Cfg.NumCores + Cfg.NumMstPeriphs ), .HCI_HWPE_SIZE ( HciHwpeSizeParam ) ) hwpe_subsystem_i ( diff --git a/scripts/wave.tcl b/scripts/wave.tcl index 594813fd..8689538f 100644 --- a/scripts/wave.tcl +++ b/scripts/wave.tcl @@ -9,12 +9,12 @@ quietly WaveActivateNextPane {} 0 set CORE_TYPE [examine -radix symbolic sim:/pulp_cluster_tb/PulpClusterCfgVis.CoreType] set NUM_CORES [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.NumCores] -set HMR_PRESENT [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.HMRPresent] -set HWPE_PRESENT [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpePresent] +set HMR_PRESENT [examine -radix hex sim:/pulp_cluster_tb/PulpClusterCfgVis.HMRPresent] +set HWPE_PRESENT [examine -radix hex sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpePresent] set HWPE_LIST [examine sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpeCfg.HwpeList] set NUM_HWPE [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.HwpeCfg.NumHwpes] -set ENABLE_WIDE [examine -radix dec sim:/pulp_cluster_tb/PulpClusterCfgVis.EnableWidePort] +set ENABLE_WIDE [examine -radix hex sim:/pulp_cluster_tb/PulpClusterCfgVis.EnableWidePort] if { $ENABLE_WIDE == 1 } { set CLUSTER_PATH /pulp_cluster_tb/gen_dma_buses/cluster_i } else { diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 76618ddc..f64b2ffb 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -159,7 +159,7 @@ module pulp_cluster_tb; TcdmSize: 128*1024, TcdmNumBank: 16, HwpePresent: 1, - HwpeCfg: '{NumHwpes: 1, HwpeList: {NEUREKA}}, + HwpeCfg: '{NumHwpes: 3, HwpeList: {SOFTEX, NEUREKA, REDMULE}}, HwpeNumPorts: 9, HMRPresent: 1, HMRDmrEnabled: 1, From da5ab6539f0ac53c870100243426ad51f5bc8119 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 09:18:42 +0200 Subject: [PATCH 55/67] fix: revert local Bender build Signed-off-by: Daniel Keller --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 9b9acda5..f929bc9b 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,8 @@ QUESTA ?= endif -# BENDER ?= bender -BENDER ?= /local/user/dkm/Projects/pulp_cluster/bender-cust/target/debug/bender +BENDER ?= bender +# BENDER ?= /local/user/dkm/Projects/pulp_cluster/bender-cust/target/debug/bender PYTHON ?= python3 VSIM ?= $(QUESTA) vsim From f0c26841300cd1bbb73559e2215b7355cb149872 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 17:22:55 +0200 Subject: [PATCH 56/67] add lock for dependencies Signed-off-by: Daniel Keller --- Makefile | 91 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index f929bc9b..31f4cffa 100644 --- a/Makefile +++ b/Makefile @@ -88,28 +88,99 @@ sw-clean: ## Clone pulp-runtime as SW stack PULP_RUNTIME_REMOTE ?= https://github.com/pulp-platform/pulp-runtime.git -PULP_RUNTIME_COMMIT ?= 118d86c2fadc130ffb3b362f6e2875ca88acb30c # branch: dkeller/chimera-v2 +PULP_RUNTIME_REF ?= dkeller/chimera-v2 +PULP_RUNTIME_DIR ?= $(ROOT_DIR)/pulp-runtime +# Lock (optional) +PULP_RUNTIME_LOCK_FILE ?= pulp-runtime.lock +PULP_RUNTIME_COMMIT := $(shell test -f $(PULP_RUNTIME_LOCK_FILE) && cat $(PULP_RUNTIME_LOCK_FILE) || echo) pulp-runtime: - git clone $(PULP_RUNTIME_REMOTE) $@ - cd $@ && git checkout $(PULP_RUNTIME_COMMIT) + @if [ -d "$(PULP_RUNTIME_DIR)/.git" ]; then \ + echo "[pulp-runtime] Using existing repo at $(PULP_RUNTIME_DIR)"; \ + git -C $(PULP_RUNTIME_DIR) fetch --tags --all; \ + else \ + echo "[pulp-runtime] Cloning $(PULP_RUNTIME_REMOTE)"; \ + git clone $(PULP_RUNTIME_REMOTE) $(PULP_RUNTIME_DIR); \ + fi + @if [ -n "$(PULP_RUNTIME_COMMIT)" ]; then \ + echo "[pulp-runtime] Checking out locked commit $(PULP_RUNTIME_COMMIT)"; \ + git -C $(PULP_RUNTIME_DIR) checkout --detach $(PULP_RUNTIME_COMMIT); \ + else \ + echo "[pulp-runtime] Checking out ref $(PULP_RUNTIME_REF)"; \ + git -C $(PULP_RUNTIME_DIR) checkout $(PULP_RUNTIME_REF) || true; \ + git -C $(PULP_RUNTIME_DIR) pull --ff-only || true; \ + fi + +lock-runtime: + @git -C $(PULP_RUNTIME_DIR) rev-parse HEAD > $(PULP_RUNTIME_LOCK_FILE) && \ + echo "Locked pulp-runtime to $$(cat $(PULP_RUNTIME_LOCK_FILE))" + +unlock-runtime: + @rm -f $(PULP_RUNTIME_LOCK_FILE) && echo "Unlocked pulp-runtime (will track $(PULP_RUNTIME_REF))" ## Clone fault injection scripts FAULT_SIM_REMOTE ?= https://github.com/pulp-platform/InjectaFault.git -FAULT_SIM_COMMIT ?= 84ddcff # branch: rt/rename-var +FAULT_SIM_BRANCH ?= rt/rename-var +FAULT_SIM_DIR ?= $(ROOT_DIR)/fault_injection_sim +# Lock (optional) +FAULT_SIM_LOCK_FILE ?= fault_injection_sim.lock +FAULT_SIM_COMMIT := $(shell test -f $(FAULT_SIM_LOCK_FILE) && cat $(FAULT_SIM_LOCK_FILE) || echo) fault_injection_sim: - git clone $(FAULT_SIM_REMOTE) $@ - cd $@ && git checkout $(FAULT_SIM_COMMIT) + @if [ -d "$(FAULT_SIM_DIR)/.git" ]; then \ + echo "[fault_injection_sim] Using existing repo at $(FAULT_SIM_DIR)"; \ + git -C $(FAULT_SIM_DIR) fetch --tags --all; \ + else \ + echo "[fault_injection_sim] Cloning $(FAULT_SIM_REMOTE)"; \ + git clone $(FAULT_SIM_REMOTE) $(FAULT_SIM_DIR); \ + fi + @if [ -n "$(FAULT_SIM_COMMIT)" ]; then \ + echo "[fault_injection_sim] Checking out locked commit $(FAULT_SIM_COMMIT)"; \ + git -C $(FAULT_SIM_DIR) checkout --detach $(FAULT_SIM_COMMIT); \ + else \ + echo "[fault_injection_sim] Checking out ref $(FAULT_SIM_BRANCH)"; \ + git -C $(FAULT_SIM_DIR) checkout $(FAULT_SIM_BRANCH) || true; \ + git -C $(FAULT_SIM_DIR) pull --ff-only || true; \ + fi + +lock-fault-sim: + @git -C $(FAULT_SIM_DIR) rev-parse HEAD > $(FAULT_SIM_LOCK_FILE) && \ + echo "Locked fault_injection_sim to $$(cat $(FAULT_SIM_LOCK_FILE))" + +unlock-fault-sim: + @rm -f $(FAULT_SIM_LOCK_FILE) && echo "Unlocked fault_injection_sim (will track $(FAULT_SIM_BRANCH))" ## Clone regression tests REGRESSION_TESTS_REMOTE ?= https://github.com/pulp-platform/regression_tests.git -REGRESSION_TESTS_COMMIT ?= 968b2d3e2259a489347017ef99910257fa247970 # branch: dkeller/chimera-v2 +REGRESSION_TESTS_BRANCH ?= dkeller/chimera-v2 +REGRESSION_TESTS_DIR ?= $(ROOT_DIR)/regression_tests +# Lock (optional) +REGRESSION_TESTS_LOCK_FILE ?= regression_tests.lock +REGRESSION_TESTS_COMMIT := $(shell test -f $(REGRESSION_TESTS_LOCK_FILE) && cat $(REGRESSION_TESTS_LOCK_FILE) || echo) regression_tests: - git clone $(REGRESSION_TESTS_REMOTE) $@ - cd $@ && git checkout $(REGRESSION_TESTS_COMMIT) - cd $@ && git submodule update --init --recursive + @if [ -d "$(REGRESSION_TESTS_DIR)/.git" ]; then \ + echo "[regression_tests] Using existing repo at $(REGRESSION_TESTS_DIR)"; \ + git -C $(REGRESSION_TESTS_DIR) fetch --tags --all; \ + else \ + echo "[regression_tests] Cloning $(REGRESSION_TESTS_REMOTE)"; \ + git clone $(REGRESSION_TESTS_REMOTE) $(REGRESSION_TESTS_DIR); \ + fi + @if [ -n "$(REGRESSION_TESTS_COMMIT)" ]; then \ + echo "[regression_tests] Checking out locked commit $(REGRESSION_TESTS_COMMIT)"; \ + git -C $(REGRESSION_TESTS_DIR) checkout --detach $(REGRESSION_TESTS_COMMIT); \ + else \ + echo "[regression_tests] Checking out ref $(REGRESSION_TESTS_BRANCH)"; \ + git -C $(REGRESSION_TESTS_DIR) checkout $(REGRESSION_TESTS_BRANCH) || true; \ + git -C $(REGRESSION_TESTS_DIR) pull --ff-only || true; \ + fi + +lock-regression-tests: + @git -C $(REGRESSION_TESTS_DIR) rev-parse HEAD > $(REGRESSION_TESTS_LOCK_FILE) && \ + echo "Locked regression_tests to $$(cat $(REGRESSION_TESTS_LOCK_FILE))" + +unlock-regression-tests: + @rm -f $(REGRESSION_TESTS_LOCK_FILE) && echo "Unlocked regression_tests (will track $(REGRESSION_TESTS_BRANCH))" ######################### # Hardware dependencies # From 0b687a35ec1eabdf7d39fcdca86b389c28a893d8 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 17:52:16 +0200 Subject: [PATCH 57/67] trigger job Signed-off-by: Daniel Keller --- rtl/pulp_cluster.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 474e78b2..0826157d 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -1211,6 +1211,7 @@ always_comb begin end end + for (int i = 0; i < Cfg.NumCores/2; i++) begin if (1'b1) begin // InterleaveGrps hmr_dmr_sw_resynch_req[i] = hmr_dmr_sw_resynch_req_short[i]; From af8bce7f2ab3b575b92ed75a5320942e87708d44 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 18:20:32 +0200 Subject: [PATCH 58/67] trigger job; fix some lint problems Signed-off-by: Daniel Keller --- rtl/pulp_cluster.sv | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 0826157d..6f959e49 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -31,7 +31,8 @@ module pulp_cluster import rapid_recovery_pkg::*; import fpnew_pkg::*; #( - parameter pulp_cluster_package::pulp_cluster_cfg_t Cfg = pulp_cluster_package::PulpClusterDefaultCfg, + parameter pulp_cluster_package::pulp_cluster_cfg_t Cfg = + pulp_cluster_package::PulpClusterDefaultCfg, localparam int unsigned TcdmBankSize = Cfg.TcdmSize/Cfg.TcdmNumBank, localparam int unsigned TcdmNumRows = TcdmBankSize/4, localparam int unsigned MaxUniqId = 1, @@ -126,7 +127,9 @@ module pulp_cluster // TCDM banks data width extended with parity for ECCs localparam int unsigned ProtectedTcdmWidth = DataWidth + ParityWidth, // Number of parity bits for ECC-extended HCI HWPE branch - localparam int unsigned HWPEParityWidth = ($clog2(DataWidth)+2)*Cfg.HwpeNumPorts + ($clog2(AddrWidth+(Cfg.HwpeNumPorts*DataWidth)/8+1)+2) + localparam int unsigned HWPEParityWidth = + ($clog2(DataWidth) + 2) * Cfg.HwpeNumPorts + + ($clog2(AddrWidth + (Cfg.HwpeNumPorts * DataWidth) / 8 + 1) + 2); )( input logic clk_i, input logic rst_ni, @@ -253,7 +256,8 @@ module pulp_cluster //Ensure that the input AXI ID width is big enough to accomodate the accomodate the IDs of internal wiring if (Cfg.AxiIdInWidth < 1 + $clog2(Cfg.iCacheNumBanks)) - $info("AXI input ID width must be larger than 1+$clog2(Cfg.iCacheNumBanks) which is %d but was %d", 1 + $clog2(Cfg.iCacheNumBanks), Cfg.AxiIdInWidth); + $info("AXI input ID width must be larger than 1+$clog2(Cfg.iCacheNumBanks) which is %d but was %d" + , 1 + $clog2(Cfg.iCacheNumBanks), Cfg.AxiIdInWidth); localparam int unsigned NB_L1_CUTS = 16; localparam int unsigned RW_MARGIN_WIDTH = 4; @@ -352,7 +356,7 @@ logic s_dma_fc_irq; // Wide AXI infrastructure: Conditional implementation based on EnableWidePort // - MCHAN: Always uses narrow transfers (EnableWidePort = 0) -// - iDMA with EnableWidePort=1: Uses wide transfers (256-bit AXI) +// - iDMA with EnableWidePort=1: Uses wide transfers (256-bit AXI) // - iDMA with EnableWidePort=0: Uses narrow transfers (64-bit AXI) // - Wide infrastructure present for interface compatibility // - Narrow DMA master merged with cluster bus master when wide disabled @@ -1211,7 +1215,6 @@ always_comb begin end end - for (int i = 0; i < Cfg.NumCores/2; i++) begin if (1'b1) begin // InterleaveGrps hmr_dmr_sw_resynch_req[i] = hmr_dmr_sw_resynch_req_short[i]; @@ -1285,13 +1288,13 @@ generate assign setback = '0; for (genvar i = 0; i < Cfg.NumCores; i++) begin - assign hmr2core[i].clock_en = sys2hmr[i].clock_en; - assign hmr2core[i].boot_addr = sys2hmr[i].boot_addr; - assign hmr2core[i].core_id = sys2hmr[i].core_id; - assign hmr2core[i].cluster_id = sys2hmr[i].cluster_id; - assign hmr2core[i].instr_gnt = sys2hmr[i].instr_gnt; - assign hmr2core[i].instr_rvalid = sys2hmr[i].instr_rvalid; - assign hmr2core[i].instr_rdata = sys2hmr[i].instr_rdata; + assign hmr2core[i].clock_en = sys2hmr[i].clock_en; + assign hmr2core[i].boot_addr = sys2hmr[i].boot_addr; + assign hmr2core[i].core_id = sys2hmr[i].core_id; + assign hmr2core[i].cluster_id = sys2hmr[i].cluster_id; + assign hmr2core[i].instr_gnt = sys2hmr[i].instr_gnt; + assign hmr2core[i].instr_rvalid = sys2hmr[i].instr_rvalid; + assign hmr2core[i].instr_rdata = sys2hmr[i].instr_rdata; assign hmr2core[i].data_gnt = sys2hmr[i].data_gnt; assign hmr2core[i].data_rvalid = sys2hmr[i].data_rvalid; assign hmr2core[i].data_rdata = sys2hmr[i].data_rdata; From b396da7b0abcc8619d9cab24a0eb86c157f8db22 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 18:29:03 +0200 Subject: [PATCH 59/67] trigger job Signed-off-by: Daniel Keller --- rtl/pulp_cluster.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 6f959e49..24c04f27 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -543,6 +543,7 @@ hci_core_intf #( // ***********************************************************************************************+ // ***********************************************************************************************+ + //*************************************************** /* synchronous AXI interfaces internal to the cluster */ //*************************************************** From 3d07b5fb2b021b03d3ee4a474b3b92b88ea123af Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 18:42:27 +0200 Subject: [PATCH 60/67] trigger job Signed-off-by: Daniel Keller --- rtl/pulp_cluster.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 24c04f27..373b088e 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -129,7 +129,7 @@ module pulp_cluster // Number of parity bits for ECC-extended HCI HWPE branch localparam int unsigned HWPEParityWidth = ($clog2(DataWidth) + 2) * Cfg.HwpeNumPorts + - ($clog2(AddrWidth + (Cfg.HwpeNumPorts * DataWidth) / 8 + 1) + 2); + ($clog2(AddrWidth + (Cfg.HwpeNumPorts * DataWidth) / 8 + 1) + 2) )( input logic clk_i, input logic rst_ni, @@ -1204,6 +1204,7 @@ end logic [Cfg.NumCores/3-1:0] hmr_tmr_sw_resynch_req_short; logic [Cfg.NumCores/2-1:0] hmr_dmr_sw_resynch_req_short; + always_comb begin hmr_tmr_sw_resynch_req = '0; hmr_dmr_sw_resynch_req = '0; From 6954b7286cd41f814eca493b5760b9b7237e33b0 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 23:13:00 +0200 Subject: [PATCH 61/67] fix: wrong Bender lock Signed-off-by: Daniel Keller --- Bender.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Bender.lock b/Bender.lock index a84dca89..9108f400 100644 --- a/Bender.lock +++ b/Bender.lock @@ -69,10 +69,10 @@ packages: dependencies: - hci common_cells: - revision: 9afda9abb565971649c2aa0985639c096f351171 + revision: b20ad7aba2080b3bb705680e0f70f1fc180ce290 version: 1.38.0 source: - Git: git@github.com:pulp-platform/common_cells.git + Git: https://github.com/pulp-platform/common_cells.git dependencies: - common_verification - tech_cells_generic From 46b03d21e010733f1d9efe83810e196890ff705d Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Thu, 21 Aug 2025 23:17:24 +0200 Subject: [PATCH 62/67] fix: wrong Bender lock Signed-off-by: Daniel Keller --- Bender.lock | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Bender.lock b/Bender.lock index 9108f400..1fd43b66 100644 --- a/Bender.lock +++ b/Bender.lock @@ -198,6 +198,7 @@ packages: - hci - hwpe-ctrl - hwpe-stream + - register_interface - zeroriscy obi: revision: c2141a653c755461ff44f61d12aeb5d99fc8e760 @@ -279,7 +280,7 @@ packages: revision: 7968dd6e6180df2c644636bc6d2908a49f2190cf version: 0.2.13 source: - Git: https://github.com/pulp-platform/tech_cells_generic + Git: https://github.com/pulp-platform/tech_cells_generic.git dependencies: - common_verification timer_unit: From 101c4285bfd859031e0448e47cfb96d52a53327a Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 22 Aug 2025 09:47:53 +0200 Subject: [PATCH 63/67] fix: initialise submodules. Fixes issue where pulp-nnx was not initialized Signed-off-by: Daniel Keller --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 31f4cffa..a2a2f030 100644 --- a/Makefile +++ b/Makefile @@ -174,6 +174,8 @@ regression_tests: git -C $(REGRESSION_TESTS_DIR) checkout $(REGRESSION_TESTS_BRANCH) || true; \ git -C $(REGRESSION_TESTS_DIR) pull --ff-only || true; \ fi + @echo "[regression_tests] Initializing submodules (e.g., pulp-nnx)" + @git -C $(REGRESSION_TESTS_DIR) submodule update --init --recursive lock-regression-tests: @git -C $(REGRESSION_TESTS_DIR) rev-parse HEAD > $(REGRESSION_TESTS_LOCK_FILE) && \ From b771d4cf6ae31a2cd3e2cb919c52981cf4bfb326 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Fri, 22 Aug 2025 16:59:33 +0200 Subject: [PATCH 64/67] update neureka version to main; remove fifo delay Signed-off-by: Daniel Keller --- Bender.lock | 2 +- rtl/cluster_interconnect_wrap.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Bender.lock b/Bender.lock index 1fd43b66..c7a5d6fd 100644 --- a/Bender.lock +++ b/Bender.lock @@ -190,7 +190,7 @@ packages: dependencies: - common_cells neureka: - revision: 94528df2bc6d5eedc0439bd403c2ad005f0a7519 + revision: ff7090eb3738a5192af0f4e1499e4ed44a3041e5 version: null source: Git: https://github.com/pulp-platform/neureka.git diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index 69dbaa53..1e3a5751 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -75,7 +75,7 @@ module cluster_interconnect_wrap // if DMA uses HWPE ports, ID width must be increased correspondingly localparam N_HCI_DMA_PORTS = DMA_USE_HWPE_PORT ? 0 : NB_DMAS; localparam N_HCI_HWPE_PORTS = DMA_USE_HWPE_PORT ? NB_HWPE + NB_DMAS : NB_HWPE; - localparam int unsigned HCI_FILTER_WRITE_R_VALID[0:N_HCI_HWPE_PORTS-1] = {1, 0, 0}; + localparam int unsigned HCI_FILTER_WRITE_R_VALID[0:NB_HWPE-1] = '{default: 1}; //-******************************************************** @@ -178,7 +178,7 @@ module cluster_interconnect_wrap .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), - .EXPFIFO ( 2 ), + .EXPFIFO ( 0 ), .`HCI_SIZE_PARAM(cores) ( HCI_CORE_SIZE ), .`HCI_SIZE_PARAM(mems) ( HCI_MEM_SIZE ), .`HCI_SIZE_PARAM(hwpe) ( HCI_HWPE_SIZE ) From 3a5143d2e79968adf8aa2d669699199cfe840ef8 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Sat, 23 Aug 2025 01:35:05 +0200 Subject: [PATCH 65/67] fix: update HCI interface to propagate Fifo Depth parameter. Fixes combinational loop when iDMA reads and writes to and from TCDM with 2 backends; bumb lock versions Signed-off-by: Daniel Keller --- Bender.lock | 4 ++-- rtl/cluster_interconnect_wrap.sv | 8 ++++---- rtl/pulp_cluster.sv | 28 ++++++++++++++++++---------- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/Bender.lock b/Bender.lock index c7a5d6fd..d8058c5e 100644 --- a/Bender.lock +++ b/Bender.lock @@ -114,7 +114,7 @@ packages: dependencies: - common_cells hci: - revision: 4257a73b391731d94077ad72fc528c27f5b11392 + revision: 0faa4f3cdfc87ead4a10e5c307c00aa6c4504d0d version: null source: Git: https://github.com/pulp-platform/hci.git @@ -190,7 +190,7 @@ packages: dependencies: - common_cells neureka: - revision: ff7090eb3738a5192af0f4e1499e4ed44a3041e5 + revision: 5ff2b6bc0a04de07eb2549a599655fb6d7f99c58 version: null source: Git: https://github.com/pulp-platform/neureka.git diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index 1e3a5751..364bb9ae 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -92,7 +92,8 @@ module cluster_interconnect_wrap .UW(HCI_HWPE_SIZE.UW), .IW(HCI_HWPE_SIZE.IW), .EW(HCI_HWPE_SIZE.EW), - .EHW(HCI_HWPE_SIZE.EHW) + .EHW(HCI_HWPE_SIZE.EHW), + .FD(HCI_HWPE_SIZE.FD) ) s_hwpe_intc [0:N_HCI_HWPE_PORTS-1] ( .clk(clk_i) @@ -105,7 +106,8 @@ module cluster_interconnect_wrap .UW(HCI_HWPE_SIZE.UW), .IW(HCI_HWPE_SIZE.IW), .EW(HCI_HWPE_SIZE.EW), - .EHW(HCI_CORE_SIZE.EHW) + .EHW(HCI_CORE_SIZE.EHW), + .FD(HCI_DMA_SIZE.FD) ) s_dma_intc [0:N_HCI_DMA_PORTS-1] ( .clk(clk_i) @@ -144,7 +146,6 @@ module cluster_interconnect_wrap .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), - .EXPFIFO ( 2 ), .FILTER_WRITE_R_VALID ( HCI_FILTER_WRITE_R_VALID ), //For an explanation of these macros refer to https://github.com/pulp-platform/hci/blob/v2.1.1/rtl/common/hci_helpers.svh .`HCI_SIZE_PARAM(cores) ( HCI_CORE_SIZE ), @@ -178,7 +179,6 @@ module cluster_interconnect_wrap .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), - .EXPFIFO ( 0 ), .`HCI_SIZE_PARAM(cores) ( HCI_CORE_SIZE ), .`HCI_SIZE_PARAM(mems) ( HCI_MEM_SIZE ), .`HCI_SIZE_PARAM(hwpe) ( HCI_HWPE_SIZE ) diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 373b088e..7812ef81 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -383,7 +383,8 @@ localparam hci_package::hci_size_parameter_t HciCoreSizeParam = '{ UW: DEFAULT_UW, IW: DEFAULT_IW, EW: DEFAULT_EW, - EHW: DEFAULT_EHW + EHW: DEFAULT_EHW, + FD: 0 }; localparam hci_package::hci_size_parameter_t HciHwpeSizeParam = '{ DW: Cfg.HwpeNumPorts * DataWidth, @@ -392,7 +393,8 @@ localparam hci_package::hci_size_parameter_t HciHwpeSizeParam = '{ UW: DEFAULT_UW, IW: DEFAULT_IW, EW: (Cfg.ECCInterco) ? HWPEParityWidth : DEFAULT_EW, - EHW: DEFAULT_EHW + EHW: DEFAULT_EHW, + FD: 2 }; localparam hci_package::hci_size_parameter_t HciDmaSizeParam = '{ DW: DMA_HCI_DATA_WIDTH, @@ -401,14 +403,16 @@ localparam hci_package::hci_size_parameter_t HciDmaSizeParam = '{ UW: DEFAULT_UW, IW: DEFAULT_IW, EW: DEFAULT_EW, - EHW: DEFAULT_EHW + EHW: DEFAULT_EHW, + FD: 0 }; /* logarithmic and peripheral interconnect interfaces */ // ext -> log interconnect hci_core_intf #( .DW ( HciCoreSizeParam.DW ), - .AW ( HciCoreSizeParam.AW ) + .AW ( HciCoreSizeParam.AW ), + .FD ( HciCoreSizeParam.FD ) ) s_hci_ext[0:`NB_EXT-1] ( .clk ( clk_i ) ); @@ -422,7 +426,8 @@ XBAR_PERIPH_BUS s_hwpe_cfg_bus(); // DMA -> (optionally) size converter hci_core_intf #( .DW ( HciDmaSizeParam.DW ), - .AW ( HciDmaSizeParam.AW ) + .AW ( HciDmaSizeParam.AW ), + .FD ( HciDmaSizeParam.FD ) ) s_hci_dma[0:Cfg.DmaNumPlugs-1] ( .clk ( clk_i ) ); @@ -438,13 +443,15 @@ hci_core_intf #( .DW ( HciHwpeSizeParam.DW ), .AW ( HciHwpeSizeParam.AW ), .EW ( HciHwpeSizeParam.EW ), - .EHW ( HciHwpeSizeParam.EHW ) + .EHW ( HciHwpeSizeParam.EHW ), + .FD ( HciHwpeSizeParam.FD ) ) s_hci_hwpe [0:0] ( .clk ( clk_i ) ); hci_core_intf #( .DW ( HciCoreSizeParam.DW ), - .AW ( HciCoreSizeParam.AW ) + .AW ( HciCoreSizeParam.AW ), + .FD ( HciCoreSizeParam.FD ) ) s_hci_core [0:Cfg.NumCores-1] ( .clk ( clk_i ) ); @@ -472,7 +479,8 @@ XBAR_TCDM_BUS s_debug_bus[Cfg.NumCores-1:0](); // FIXME: iDMA hci_core_intf #( .DW ( HciCoreSizeParam.DW ), - .AW ( HciCoreSizeParam.AW ) + .AW ( HciCoreSizeParam.AW ), + .FD ( HciCoreSizeParam.FD ) ) s_core_dmactrl_bus [0:Cfg.NumCores-1] ( .clk ( clk_i ) ); @@ -518,7 +526,8 @@ localparam hci_package::hci_size_parameter_t HciMemSizeParam = '{ UW: DEFAULT_UW, IW: TCDM_ID_WIDTH, EW: (Cfg.ECCInterco) ? ParityWidth+MetaParityWidth : DEFAULT_EW, - EHW: DEFAULT_EHW + EHW: DEFAULT_EHW, + FD: 0 }; // log interconnect -> TCDM memory banks (SRAM) @@ -787,7 +796,6 @@ cluster_interconnect_wrap #( .HCI_HWPE_SIZE ( HciHwpeSizeParam ), .HCI_DMA_SIZE ( HciDmaSizeParam ), .HCI_MEM_SIZE ( HciMemSizeParam ) - ) cluster_interconnect_wrap_i ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), From 584f3df8f4219048028193801a26ac7d6e0db148 Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Sat, 23 Aug 2025 01:55:36 +0200 Subject: [PATCH 66/67] trigger job, green CI Signed-off-by: Daniel Keller --- tb/pulp_cluster_tb.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index f64b2ffb..e2bb639b 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -713,6 +713,7 @@ module pulp_cluster_tb; if ( $value$plusargs ("APP=%s", binary)); $display("[TB] Testing %s", binary); + load_binary(binary); foreach (sections[addr]) begin From 1bb8bbc7ecee7addeb746efd724d080b4bf8111f Mon Sep 17 00:00:00 2001 From: Daniel Keller Date: Sat, 23 Aug 2025 18:13:54 +0200 Subject: [PATCH 67/67] Revert "Merge branch 'smazzola/chimera' into dkeller/chimera-v2" This reverts commit 22646aef0ce40bed4100d0617890bd7d8a33f8dd, reversing changes made to 584f3df8f4219048028193801a26ac7d6e0db148. --- .gitignore | 1 + Bender.lock | 17 ++++++------- Bender.yml | 5 ++-- Makefile | 30 ++--------------------- bender-common.mk | 1 - rtl/cluster_interconnect_wrap.sv | 1 - rtl/idma_wrap.sv | 4 ++-- rtl/pulp_cluster.sv | 19 ++++++++++----- tb/pulp_cluster_tb.sv | 41 +------------------------------- 9 files changed, 31 insertions(+), 88 deletions(-) diff --git a/.gitignore b/.gitignore index bb15cae9..a2dc6bf9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ regression_tests/ pulp-runtime/ fault_injection_sim/ venv/ +.venv/ diff --git a/Bender.lock b/Bender.lock index 7a700583..d8058c5e 100644 --- a/Bender.lock +++ b/Bender.lock @@ -98,14 +98,6 @@ packages: Git: https://github.com/pulp-platform/event_unit_flex.git dependencies: - common_cells - flex-v: - revision: e9355c2f6ec4c105abdff39489e5d1be93bc4374 - version: null - source: - Git: https://github.com/pulp-platform/flex-v.git - dependencies: - - fpnew - - tech_cells_generic fpnew: revision: a8e0cba6dd50f357ece73c2c955d96efc3c6c315 version: null @@ -206,6 +198,7 @@ packages: - hci - hwpe-ctrl - hwpe-stream + - register_interface - zeroriscy obi: revision: c2141a653c755461ff44f61d12aeb5d99fc8e760 @@ -256,6 +249,14 @@ packages: - axi - common_cells - common_verification + riscv: + revision: c760db14dbd6cc3ec3b8ae8274df2eac7225bcac + version: null + source: + Git: https://github.com/AlSaqr-platform/riscv_nn.git + dependencies: + - fpnew + - tech_cells_generic scm: revision: 472f99affe44ff7b282b519c047a3cfeb35b16c6 version: 1.2.0 diff --git a/Bender.yml b/Bender.yml index 039b52f8..0ff29137 100644 --- a/Bender.yml +++ b/Bender.yml @@ -26,7 +26,7 @@ dependencies: timer_unit: { git: "https://github.com/pulp-platform/timer_unit.git", version: 1.0.2 } common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.35.0 } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.3 } - flex-v: { git: "https://github.com/pulp-platform/flex-v.git", rev: astral-v1.1 } + riscv: { git: "https://github.com/AlSaqr-platform/riscv_nn.git", rev: astral-v1.0 } cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 } ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } @@ -34,7 +34,7 @@ dependencies: register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.5 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 9e31f7c6c24877eaf58279903e7a162b16c9a721 } # branch: astral-v0 redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: astral-v1.0 } - neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: ff7090eb3738a5192af0f4e1499e4ed44a3041e5 } # branch: main + neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: astral-v1.0 } softex: { git: "https://github.com/belanoa/softex.git" , rev: astral-v1.0 } obi: { git: "https://github.com/pulp-platform/obi.git", rev: v0.1.3 } @@ -96,3 +96,4 @@ sources: files: - nonfree/gf12/sourcecode/tc_sram.sv - nonfree/gf12/sourcecode/tc_clk.sv + diff --git a/Makefile b/Makefile index e1aa1d69..a2a2f030 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ endif BENDER ?= bender +# BENDER ?= /local/user/dkm/Projects/pulp_cluster/bender-cust/target/debug/bender PYTHON ?= python3 VSIM ?= $(QUESTA) vsim @@ -45,6 +46,7 @@ endef ###################### NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/pulp-cluster-nonfree.git +#NONFREE_COMMIT ?= 6f5b4b5aa85b6f3ac4bbe03439dd250ab4810d80 # branch: dkeller/chimera-v2 NONFREE_BRANCH ?= dkeller/chimera-v2 nonfree-init: @@ -208,34 +210,6 @@ gen_idma_hw: clean_idma_hw: make -C $(IDMA_ROOT) idma_clean_all -######################### -# Hardware dependencies # -######################### - -# Set dependency paths only if dependencies have already been cloned -# This avoids running `bender checkout` at every make command -ifeq ($(shell test -d $(ROOT_DIR)/.bender || echo 1),) -IDMA_ROOT := $(shell $(BENDER) path idma) -endif - -# Fall back to safe defaults if dependencies are not cloned yet -IDMA_ROOT ?= . - -# Python requirements (version and packages) coming from iDMA repository -gen_idma_hw: $(IDMA_ROOT)/.idma_generated -$(IDMA_ROOT)/.idma_generated: - @$(PYTHON) --version >/dev/null 2>&1 || { echo "ERROR: Python not found. Python 3.8 or higher is required."; exit 1; } && \ - $(PYTHON) -c "import sys; assert sys.version_info >= (3, 8)" || { echo "ERROR: Python version must be 3.8 or higher"; exit 1; } && \ - rm -rf venv && $(PYTHON) -m venv venv && \ - . venv/bin/activate && \ - pip install --upgrade pip && \ - pip install -r $(IDMA_ROOT)/requirements.txt && \ - make -C $(IDMA_ROOT) idma_hw_all && \ - touch $@ - -clean_idma_hw: - make -C $(IDMA_ROOT) idma_clean_all - ######################## # Build and simulation # ######################## diff --git a/bender-common.mk b/bender-common.mk index 97d97c0d..4297d044 100644 --- a/bender-common.mk +++ b/bender-common.mk @@ -11,7 +11,6 @@ common_defs += -D CLUSTER_ALIAS common_defs += -D USE_PULP_PARAMETERS common_targs += -t rtl - # DMA configuration DMA_TYPE ?= idma diff --git a/rtl/cluster_interconnect_wrap.sv b/rtl/cluster_interconnect_wrap.sv index d97b9c3c..364bb9ae 100644 --- a/rtl/cluster_interconnect_wrap.sv +++ b/rtl/cluster_interconnect_wrap.sv @@ -179,7 +179,6 @@ module cluster_interconnect_wrap .N_MEM ( NB_TCDM_BANKS ), .IW ( TCDM_ID_WIDTH ), .TS_BIT ( TEST_SET_BIT ), - .EXPFIFO ( 2 ), .`HCI_SIZE_PARAM(cores) ( HCI_CORE_SIZE ), .`HCI_SIZE_PARAM(mems) ( HCI_MEM_SIZE ), .`HCI_SIZE_PARAM(hwpe) ( HCI_HWPE_SIZE ) diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv index 00a7d48c..7c38fab1 100644 --- a/rtl/idma_wrap.sv +++ b/rtl/idma_wrap.sv @@ -39,7 +39,7 @@ module dmac_wrap #( parameter int unsigned GLOBAL_QUEUE_DEPTH = 2, // mux read ports between tcdm-tcdm and tcdm-axi? parameter bit MUX_READ = 1'b0, - parameter bit TCDM_MEM2BANKS = 1'b0, + parameter bit TCDM_MEM2BANKS = 1'b0, // when using mem2banks (implies AXI_DATA_WIDTH==64): // 4 ports per stream if read ports muxed, otherwise 6 // when not using mem2banks: @@ -177,7 +177,7 @@ module dmac_wrap #( axi_req_t [ NumStreams-1:0] dma_req; axi_resp_t [ NumStreams-1:0] dma_rsp; - // interface to structs + // interface to structs for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_connect_interface assign ext_master_req_o[s] = soc_req[s]; assign soc_rsp[s] = ext_master_resp_i[s]; diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 67e56f78..7812ef81 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -375,6 +375,7 @@ localparam DMA_IW_CONTRIB_FAC = Cfg.DmaUseHwpePort ? 0 : 1; // if using MCHAN, must be 32 localparam int unsigned DMA_HCI_DATA_WIDTH = Cfg.DmaUseHwpePort ? Cfg.AxiDataOutWideWidth : DataWidth; + localparam hci_package::hci_size_parameter_t HciCoreSizeParam = '{ DW: DataWidth, AW: AddrWidth, @@ -616,6 +617,7 @@ hci_core_intf #( c2s_out_int_req_t s_dma_narrow_master_req; // Narrow DMA master (64-bit) c2s_out_int_resp_t s_dma_narrow_master_resp; + // core per2axi -> ext c2s_in_int_req_t s_core_ext_bus_req; c2s_in_int_resp_t s_core_ext_bus_resp; @@ -906,6 +908,7 @@ end else begin : gen_narrow_port_idma ); end + //*************************************************** //**************CLUSTER PERIPHERALS****************** //*************************************************** @@ -2047,12 +2050,13 @@ if (Cfg.AxiDataInWidth != Cfg.AxiDataOutWidth) begin `AXI_ASSIGN_REQ_STRUCT(s_data_slave_32_req,dst_remap_req) `AXI_ASSIGN_RESP_STRUCT(dst_remap_resp,s_data_slave_32_resp) - axi_dw_converter #( - .AxiIdWidth ( AxiIdInWidth ), - .AxiAddrWidth ( Cfg.AxiAddrWidth ), - .AxiSlvPortDataWidth ( Cfg.AxiDataInWidth ), - .AxiMstPortDataWidth ( Cfg.AxiDataOutWidth ), - .AxiMaxReads ( 1 ), + axi_dw_converter_intf #( + .AXI_ID_WIDTH ( AxiIdInWidth ), + .AXI_ADDR_WIDTH ( Cfg.AxiAddrWidth ), + .AXI_SLV_PORT_DATA_WIDTH ( Cfg.AxiDataInWidth ), + .AXI_MST_PORT_DATA_WIDTH ( Cfg.AxiDataOutWidth ), + .AXI_USER_WIDTH ( Cfg.AxiUserWidth ), + .AXI_MAX_READS ( 1 ), .aw_chan_t ( s2c_in_int_aw_chan_t ), .mst_w_chan_t ( c2s_w_chan_t ), .slv_w_chan_t ( s2c_in_int_w_chan_t ), @@ -2128,6 +2132,9 @@ initial begin : p_assert assert(DMA_HCI_DATA_WIDTH == DataWidth) else $fatal(1, "When Cfg.DmaUseHwpePort is 0, DMA_HCI_DATA_WIDTH must be equal to DataWidth!"); end + // Note: iDMA now uses conditional data width and AXI path selection + // EnableWidePort=0: iDMA uses 64-bit narrow transfers via cluster bus AXI path + // EnableWidePort=1: iDMA uses 256-bit wide transfers via dedicated wide AXI path `endif end `endif diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 413778c5..e2bb639b 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -494,14 +494,6 @@ module pulp_cluster_tb; .AXI_USER_WIDTH ( AxiUw ), .LOG_DEPTH ( 3 ) ) async_cluster_to_soc_axi_bus(); - - AXI_BUS_ASYNC_GRAY #( - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_DATA_WIDTH ( DmaAxiDw ), - .AXI_ID_WIDTH ( DmaAxiIw ), - .AXI_USER_WIDTH ( AxiUw ), - .LOG_DEPTH ( 3 ) - ) async_dma_axi_bus(); // Behavioural slaves axi_m_req_t axi_memreq; @@ -510,38 +502,6 @@ module pulp_cluster_tb; `AXI_ASSIGN_TO_REQ(axi_memreq, axi_master[1]) `AXI_ASSIGN_FROM_RESP(axi_master[1], axi_memrsp) - axi_dw_converter_intf #( - .AXI_ID_WIDTH ( AxiIw ), - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_SLV_PORT_DATA_WIDTH ( DmaAxiDw ), - .AXI_MST_PORT_DATA_WIDTH ( AxiDw ), - .AXI_USER_WIDTH ( AxiUw ), - .AXI_MAX_READS ( 3 ) - ) i_dma_dw_conv ( - .clk_i ( s_clk ), - .rst_ni ( s_rstn ), - .slv ( dma_slave_iw ), - .mst ( axi_slave[2] ) - ); - - axi_iw_converter_intf #( - .AXI_SLV_PORT_ID_WIDTH ( DmaAxiIw ), - .AXI_MST_PORT_ID_WIDTH ( AxiIw ), - .AXI_SLV_PORT_MAX_UNIQ_IDS ( 5 ), - .AXI_SLV_PORT_MAX_TXNS_PER_ID ( 5 ), - .AXI_SLV_PORT_MAX_TXNS ( 5 ), - .AXI_MST_PORT_MAX_UNIQ_IDS ( 5 ), - .AXI_MST_PORT_MAX_TXNS_PER_ID ( 5 ), - .AXI_ADDR_WIDTH ( AxiAw ), - .AXI_DATA_WIDTH ( DmaAxiDw ), - .AXI_USER_WIDTH ( AxiUw ) - ) i_dma_iw_conv ( - .clk_i ( s_clk ), - .rst_ni ( s_rstn ), - .slv ( dma_slave ), - .mst ( dma_slave_iw ) - ); - axi_sim_mem #( .AddrWidth ( AxiAw ), .DataWidth ( AxiDw ), @@ -845,4 +805,5 @@ module pulp_cluster_tb; end + endmodule : pulp_cluster_tb