diff --git a/Bender.local b/Bender.local new file mode 100644 index 00000000..f46dbcf8 --- /dev/null +++ b/Bender.local @@ -0,0 +1,21 @@ +overrides: + axi2mem: { git: "https://github.com/pulp-platform/axi2mem.git", version: =1.0.2 } + axi2per: { git: "https://github.com/pulp-platform/axi2per.git", version: =1.0.2 } + per2axi: { git: "https://github.com/pulp-platform/per2axi.git", version: =1.0.5 } + cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", version: =1.3.0 } + mchan: { git: "https://github.com/pulp-platform/mchan.git", version: =1.2.4 } + obi: { git: "https://github.com/pulp-platform/obi.git", version: =0.1.7 } + cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", version: =2.2.0 } + axi: { git: "https://github.com/pulp-platform/axi.git", version: =0.39.8 } + timer_unit: { git: "https://github.com/pulp-platform/timer_unit.git", version: =1.0.2 } + tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: =0.2.13 } + scm: { git: "https://github.com/pulp-platform/scm.git", version: =1.2.0 } + register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: =0.4.7 } + redmule: { git: "https://github.com/pulp-platform/redmule.git", version: =2.0.4 } + common_cells: { git: "https://github.com/FondazioneChipsIT/common_cells.git", rev: 286ffd49d9ed3409ceaef7c7fe98516e2611a1a9 } # gl/fix_define + hci: { git: "https://github.com/FondazioneChipsIT/hci.git", rev: 3e63936991569d0b1e315d6914478937b37bb338 } + cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 } + redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 6ba6f41 } # yt/redmule-v2 + fpnew: { git: "https://github.com/pulp-platform/cvfpu.git", rev: pulp-v0.1.3 } + hwpe-stream: { git: "https://github.com/pulp-platform/hwpe-stream.git", version: =1.9.0 } + hwpe-ctrl: { git: "https://github.com/pulp-platform/hwpe-ctrl.git", rev: 3690a3c } \ No newline at end of file diff --git a/Bender.lock b/Bender.lock index 9a29e272..6e061c45 100644 --- a/Bender.lock +++ b/Bender.lock @@ -45,13 +45,14 @@ packages: dependencies: - common_cells cluster_icache: - revision: 64e21ae455bbdde850c4df13bef86ea55ac42537 - version: 0.2.0 + revision: 57ffb22066171d89974bdec240bad13f5efac9de + version: null source: - Git: https://github.com/pulp-platform/cluster_icache.git + Git: https://github.com/FondazioneChipsIT/cluster_icache.git dependencies: - axi - common_cells + - register_interface - scm - tech_cells_generic cluster_interconnect: @@ -62,17 +63,17 @@ packages: dependencies: - common_cells cluster_peripherals: - revision: e464eb9ddcc39e5a50009819601c4f213b1d4ba3 - version: 2.2.0 + revision: 630159c9d81c41a9638163de292fbfc0682bf0e5 + version: null source: - Git: https://github.com/pulp-platform/cluster_peripherals.git + Git: https://github.com/FondazioneChipsIT/cluster_peripherals.git dependencies: - hci common_cells: - revision: 9afda9abb565971649c2aa0985639c096f351171 - version: 1.38.0 + revision: 286ffd49d9ed3409ceaef7c7fe98516e2611a1a9 + version: null source: - Git: https://github.com/pulp-platform/common_cells.git + Git: https://github.com/FondazioneChipsIT/common_cells.git dependencies: - common_verification - tech_cells_generic @@ -105,15 +106,15 @@ packages: dependencies: - common_cells flex-v: - revision: e9355c2f6ec4c105abdff39489e5d1be93bc4374 + revision: f22937733fd058550ef847ddbf1381e821f0f9d2 version: null source: - Git: https://github.com/pulp-platform/flex-v.git + Git: https://github.com/FondazioneChipsIT/flex-v.git dependencies: - fpnew - tech_cells_generic fpnew: - revision: e5aa6a01b5bbe1675c3aa8872e1203413ded83d1 + revision: a8e0cba6dd50f357ece73c2c955d96efc3c6c315 version: null source: Git: https://github.com/pulp-platform/cvfpu.git @@ -128,10 +129,10 @@ packages: dependencies: - common_cells hci: - revision: 2a5a5081a2b32f1a04e7e28c00d3762d92602b84 + revision: 3e63936991569d0b1e315d6914478937b37bb338 version: null source: - Git: https://github.com/pulp-platform/hci.git + Git: https://github.com/FondazioneChipsIT/hci.git dependencies: - cluster_interconnect - common_cells @@ -140,10 +141,10 @@ packages: - redundancy_cells - register_interface hier-icache: - revision: 7243834d2407ca23cff583d57641c84b982bd9bc - version: 1.3.0 + revision: 416c5cbc7b9b8724a40da6ac6d0b19a9c6b864bc + version: null source: - Git: https://github.com/pulp-platform/hier-icache.git + Git: https://github.com/FondazioneChipsIT/hier-icache.git dependencies: - axi - axi_slice @@ -179,10 +180,10 @@ packages: Git: https://github.com/pulp-platform/icache-intc.git dependencies: [] idma: - revision: c12caf59bb482fe44b27361f6924ad346b2d22fe - version: 0.6.3 + revision: 2e637216e0455d77706a50d0639b86891e2a83aa + version: null source: - Git: https://github.com/pulp-platform/iDMA.git + Git: https://github.com/FondazioneChipsIT/iDMA.git dependencies: - axi - axi_stream @@ -257,8 +258,8 @@ packages: - register_interface - tech_cells_generic register_interface: - revision: 5daa85d164cf6b54ad061ea1e4c6f3624556e467 - version: 0.4.5 + revision: d6e1d4cdaab7870f4faf3f88a1c788eaf5ac129d + version: 0.4.7 source: Git: https://github.com/pulp-platform/register_interface.git dependencies: @@ -274,10 +275,10 @@ packages: dependencies: - tech_cells_generic softex: - revision: 11dd29e85d40e29fea0481b471f1c0cc967df1a4 + revision: a622550d91dd55bf30bc359e10b2c70cae3042d6 version: null source: - Git: https://github.com/belanoa/softex.git + Git: https://github.com/FondazioneChipsIT/softex.git dependencies: - common_cells - fpnew @@ -293,8 +294,8 @@ packages: dependencies: - common_verification timer_unit: - revision: 4c69615c89db9397a9747d6f6d6a36727854f0bc - version: 1.0.3 + revision: 3f4ee3e5b3875a473242de5d0c3ebb5a0fe4b8db + version: 1.0.2 source: Git: https://github.com/pulp-platform/timer_unit.git dependencies: [] diff --git a/Bender.yml b/Bender.yml index d6aec12b..e14d2986 100644 --- a/Bender.yml +++ b/Bender.yml @@ -12,30 +12,31 @@ package: - "Michael Rogenmoser " dependencies: - axi2mem: { git: "https://github.com/pulp-platform/axi2mem.git", rev: v1.0.2 } - axi2per: { git: "https://github.com/pulp-platform/axi2per.git", rev: v1.0.2 } - per2axi: { git: "https://github.com/pulp-platform/per2axi.git", rev: v1.0.5 } - cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", rev: v1.3.0 } + axi2mem: { git: "https://github.com/pulp-platform/axi2mem.git", version: =1.0.2 } + axi2per: { git: "https://github.com/pulp-platform/axi2per.git", version: =1.0.2 } + per2axi: { git: "https://github.com/pulp-platform/per2axi.git", version: =1.0.5 } + cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", version: =1.3.0 } event_unit_flex: { git: "https://github.com/pulp-platform/event_unit_flex.git", rev: astral-v1.0 } - mchan: { git: "https://github.com/pulp-platform/mchan.git", rev: v1.2.4 } - idma: { git: "https://github.com/pulp-platform/iDMA.git", rev: v0.6.3 } - hier-icache: { git: "https://github.com/pulp-platform/hier-icache.git", rev: v1.3.0 } - cluster_icache: { git: "https://github.com/pulp-platform/cluster_icache.git", rev: v0.2.0 } - cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", rev: v2.2.0 } - axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.3 } - timer_unit: { git: "https://github.com/pulp-platform/timer_unit.git", version: 1.0.2 } - common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.35.0 } - tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.3 } - flex-v: { git: "https://github.com/pulp-platform/flex-v.git", rev: astral-v1.1 } + mchan: { git: "https://github.com/pulp-platform/mchan.git", version: =1.2.4 } + idma: { git: "https://github.com/FondazioneChipsIT/iDMA.git", rev: ChipsIt-v0.0.0 } + obi: { git: "https://github.com/pulp-platform/obi.git", version: =0.1.7 } + hier-icache: { git: "https://github.com/FondazioneChipsIT/hier-icache.git", rev: 416c5cbc7b9b8724a40da6ac6d0b19a9c6b864bc } # gl/fix_define + cluster_icache: { git: "https://github.com/FondazioneChipsIT/cluster_icache.git", rev: 57ffb22066171d89974bdec240bad13f5efac9de } # gl/fix_define + cluster_peripherals: { git: "https://github.com/FondazioneChipsIT/cluster_peripherals.git", rev: "630159c9d81c41a9638163de292fbfc0682bf0e5" } # chips-it + axi: { git: "https://github.com/pulp-platform/axi.git", version: =0.39.8 } + timer_unit: { git: "https://github.com/pulp-platform/timer_unit.git", version: =1.0.2 } + common_cells: { git: "https://github.com/FondazioneChipsIT/common_cells.git", rev: 286ffd49d9ed3409ceaef7c7fe98516e2611a1a9 } # gl/fix_define + tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: =0.2.3 } + flex-v: { git: "https://github.com/FondazioneChipsIT/flex-v.git", rev: f22937733fd058550ef847ddbf1381e821f0f9d2 } # gl/fix_pkg_import cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: astral-v1.0 } ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } - scm: { git: "https://github.com/pulp-platform/scm.git", rev: v1.2.0 } - hci: { git: "https://github.com/pulp-platform/hci.git", rev: 2a5a508 } # yt/wen-filter - register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 } + scm: { git: "https://github.com/pulp-platform/scm.git", version: =1.2.0 } + hci: { git: "https://github.com/FondazioneChipsIT/hci.git", rev: 3e63936991569d0b1e315d6914478937b37bb338 } + register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: =0.4.7 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 6ba6f41 } # yt/redmule-v2 redmule: { git: "https://github.com/pulp-platform/redmule.git", version: =2.0.4 } neureka: { git: "https://github.com/pulp-platform/neureka.git", rev: f4e131c } # branch: lg/astral-v1.1 - softex: { git: "https://github.com/belanoa/softex.git" , rev: astral-v1.0 } + softex: { git: "https://github.com/FondazioneChipsIT/softex.git", rev: a622550d91dd55bf30bc359e10b2c70cae3042d6 } # gl/fix_pkg_import export_include_dirs: - include @@ -56,7 +57,7 @@ sources: - target: mchan files: - rtl/mchan_wrap.sv - - target: not(mchan) + - target: idma files: - rtl/idma_wrap.sv - rtl/hwpe_subsystem.sv @@ -93,6 +94,6 @@ sources: - target: pulp_standalone_synth files: - - nonfree/gf12/sourcecode/tc_sram.sv - - nonfree/gf12/sourcecode/tc_clk.sv + - gf22/sourcecode/tc_sram.sv + - gf22/sourcecode/tc_clk.sv diff --git a/CHANGELOG.md b/CHANGELOG.md index dcb588ab..4543a057 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v0.0.0 +### Added +* iDMA: integrated iDMA wrap supporting up to 3D transfers with 2 physical channels and parametric burst length + +### Fixed +* Testbench: fixed timeunit to ns and change clock to 500 MHz + ## [Unreleased] ### Added - Added `CHANGELOG.md` diff --git a/Makefile b/Makefile index 3d9e3580..b8b4ca32 100644 --- a/Makefile +++ b/Makefile @@ -17,14 +17,22 @@ BENDER ?= bender VSIM ?= $(QUESTA) vsim VOPT ?= $(QUESTA) vopt VLIB ?= $(QUESTA) vlib -top_level ?= pulp_cluster_tb + +QSIM ?= $(QUESTA) qsim +QOPT ?= $(QUESTA) qopt +Q1VE ?= q1ve --qverify + +VENV := venv + +top_level ?= pulp_cluster library ?= work elf-bin ?= stimuli.riscv bwruntest = $(ROOT_DIR)/pulp-runtime/scripts/bwruntests.py REGRESSIONS := $(ROOT_DIR)/regression_tests -VLOG_ARGS += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 -timescale \"1 ns / 1 ps\" \"+incdir+$(shell pwd)/include\" +VLOG_ARGS_LINT += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 \"+incdir+$(shell pwd)/include\" +VLOG_ARGS += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 -timescale \"1ns / 1ps\" \"+incdir+$(shell pwd)/include\" define generate_vsim echo 'set ROOT [file normalize [file dirname [info script]]/$3]' > $1 @@ -49,7 +57,7 @@ nonfree-init: .PHONY: init -init: checkout +init: checkout generate_idma_rtl .PHONY: checkout ## Checkout/update dependencies using Bender @@ -74,8 +82,8 @@ sw-clean: @rm -rf pulp-runtime fault_injection_sim regression_tests ## Clone pulp-runtime as SW stack -PULP_RUNTIME_REMOTE ?= https://github.com/pulp-platform/pulp-runtime.git -PULP_RUNTIME_COMMIT ?= f10670b # branch: lg/upstream +PULP_RUNTIME_REMOTE ?= https://github.com/FondazioneChipsIT/pulp-runtime.git +PULP_RUNTIME_COMMIT ?= 9bd23d2b2b6b6c74fc26a3ab3402fbebc589dace # branch: chips-it pulp-runtime: git clone $(PULP_RUNTIME_REMOTE) $@ @@ -90,8 +98,8 @@ fault_injection_sim: cd $@ && git checkout $(FAULT_SIM_COMMIT) ## Clone regression tests -REGRESSION_TESTS_REMOTE ?= https://github.com/pulp-platform/regression_tests.git -REGRESSION_TESTS_COMMIT ?= 7fa307d # branch: lg/upstream +REGRESSION_TESTS_REMOTE ?= https://github.com/FondazioneChipsIT/regression_tests.git +REGRESSION_TESTS_COMMIT ?= dd41d893ee371df7407ce2c0128d2d3a45c4ec71 # branch: chips-it regression_tests: git clone $(REGRESSION_TESTS_REMOTE) $@ @@ -111,27 +119,67 @@ sim-clean: include bender-common.mk include bender-sim.mk scripts/compile.tcl: | Bender.lock - $(call generate_vsim, $@, $(common_defs) $(common_targs) $(sim_defs) $(sim_targs),..) + $(call generate_vsim, $@, $(common_defs) $(common_targs) -t idma $(sim_defs) $(sim_targs),..) echo 'vlog "$(realpath $(ROOT_DIR))/tb/dpi/elfloader.cpp" -ccflags "-std=c++11"' >> $@ +scripts/compile.tcl-mchan: | Bender.lock + $(call generate_vsim, scripts/compile.tcl, $(common_defs) $(common_targs) -t mchan $(sim_defs) $(sim_targs),..) + echo 'vlog "$(realpath $(ROOT_DIR))/tb/dpi/elfloader.cpp" -ccflags "-std=c++11"' >> scripts/compile.tcl + include bender-synth.mk scripts/synth-compile.tcl: | Bender.lock $(BENDER) script synopsys $(common_targs) $(common_defs) $(synth_targs) $(synth_defs) > $@ +scripts/compile_lint.tcl: + echo 'set ROOT $(ROOT_DIR)' > $@ + $(BENDER) script vsim --vlog-arg="$(VLOG_ARGS_LINT)" $(common_defs) $(common_targs) | grep -v "set ROOT" >> $@ + echo >> $@ + $(library): $(QUESTA) vlib $(library) +venv: + python3 -m venv $(VENV) && \ + $(VENV)/bin/python -m pip install -U pip && \ + $(VENV)/bin/python -m pip install -r $(shell bender path idma)/requirements.txt + +generate_idma_rtl: venv + . "$(VENV)/bin/activate" && $(MAKE) -C $(shell bender path idma) idma_hw_all + compile: $(library) @test -f Bender.lock || { echo "ERROR: Bender.lock file does not exist. Did you run make checkout in bender mode?"; exit 1; } @test -f scripts/compile.tcl || { echo "ERROR: scripts/compile.tcl file does not exist. Did you run make scripts in bender mode?"; exit 1; } $(VSIM) -c -do 'quit -code [source scripts/compile.tcl]' -build: compile - $(VOPT) $(compile_flag) -suppress 3053 -suppress 8885 -work $(library) $(top_level) -o $(top_level)_optimized +acc +build_qone: compile + $(QOPT) $(compile_flag) -debug +designfile -suppress 3053 -suppress 8885 -work $(library) $(top_level)_tb -o $(top_level)_tb_optimized + + +build: generate_idma_rtl compile + $(VOPT) $(compile_flag) -suppress 3053 -suppress 8885 -work $(library) $(top_level)_tb -o $(top_level)_tb_optimized +acc + +compile_lint: $(library) + @test -f Bender.lock || { echo "ERROR: Bender.lock file does not exist. Did you run make checkout in bender mode?"; exit 1; } + @test -f scripts/compile_lint.tcl || { echo "ERROR: scripts/compile_lint.tcl file does not exist. Did you run make scripts in bender mode?"; exit 1; } + $(Q1VE) -od lint/comp_lint_results -c -do " \ + onerror {exit}; \ + do scripts/compile_lint.tcl; \ + exit" + +lint: compile_lint + $(Q1VE) -od lint/lint_results -c -do " \ + lint methodology ip -goal release; \ + lint run -d $(top_level); \ + exit" + +cdc: compile_lint + $(Q1VE) -od cdc_results -c -do " \ + cdc run -d $(top_level); \ + exit" run: $(VSIM) +permissive -suppress 3053 -suppress 8885 -lib $(library) +MAX_CYCLES=$(max_cycles) +UVM_TESTNAME=$(test_case) +APP=$(elf-bin) +notimingchecks +nospecify -t 1ps \ - ${top_level}_optimized +permissive-off ++$(elf-bin) ++$(target-options) ++$(cl-bin) | tee sim.log + ${top_level}_tb_optimized +permissive-off ++$(elf-bin) ++$(target-options) ++$(cl-bin) | tee sim.log #################### # Regression tests # diff --git a/README.md b/README.md index 5799118f..b8d4a5d0 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ We use [Bender](https://github.com/pulp-platform/bender) for hardware IP and dep cargo install bender ``` -## Simulation +## QuestaSim Simulation It is possible to run benchmarks on the cluster, either within [PULP](https://github.com/pulp-platform/pulp) or as a separate IP. For @@ -68,3 +68,21 @@ Warning: requires QuestaSim 2022.3 or newer. ``` To use the GUI, add `gui=1` to the previous command. + +## QuestaOne Simulation + +To simulate with the new QuestaOne flow, follow the previous steps up to the `make pulp-runtime` command, then: + +1. Run the following from the pulp cluster root directory: + ``` + make build_qone + ``` + +2. Go to the desired test directory in regression_tests and run the following: + ``` + make clean all run_qone + ``` + or: + ``` + make clean all run_qone gui=1 + ``` diff --git a/bender-common.mk b/bender-common.mk index ca6c866d..75c48018 100644 --- a/bender-common.mk +++ b/bender-common.mk @@ -11,7 +11,6 @@ common_defs += -D CLUSTER_ALIAS common_defs += -D USE_PULP_PARAMETERS common_targs += -t rtl -common_targs += -t mchan common_targs += -t cluster_standalone common_targs += -t scm_use_fpga_scm common_targs += -t cv32e40p_use_ff_regfile \ No newline at end of file diff --git a/bender-synth.mk b/bender-synth.mk index ccefb638..ea5f9756 100644 --- a/bender-synth.mk +++ b/bender-synth.mk @@ -4,4 +4,6 @@ synth_defs := +# Don't add the following for linting synth_targs := -t pulp_standalone_synth +synth_targs += -t idma diff --git a/packages/pulp_cluster_package.sv b/packages/pulp_cluster_package.sv index 2f8dae70..33467fa9 100644 --- a/packages/pulp_cluster_package.sv +++ b/packages/pulp_cluster_package.sv @@ -208,7 +208,7 @@ package pulp_cluster_package; NumCores: NumCores, DmaNumPlugs: NumDmas, DmaNumOutstandingBursts: 8, - DmaBurstLength: 256, + DmaBurstLength: 5, // Means bursts of 256 bytes: passed as 256 to mchan top, later computed inside iDMA NumMstPeriphs: NB_MPERIPHS, NumSlvPeriphs: NB_SPERIPHS, ClusterAlias: 1, @@ -227,9 +227,9 @@ package pulp_cluster_package; HMRTmrFIxed: 0, HMRInterleaveGrps: 1, HMREnableRapidRecovery: 1, - HMRSeparateDataVoters:1, - HMRSeparateAxiBus:0, - HMRNumBusVoters:1, + HMRSeparateDataVoters: 1, + HMRSeparateAxiBus: 0, + HMRNumBusVoters: 1, EnableECC: 1, ECCInterco: 1, iCacheNumBanks: 2, @@ -240,19 +240,16 @@ package pulp_cluster_package; iCachePrivateDataWidth: 32, EnableReducedTag: 1, L2Size: 1000*1024, - DmBaseAddr: 'h1A110000, - BootRomBaseAddr: 'h1A000000, - BootAddr: 'h1C000000, + DmBaseAddr: 'h60203000, + BootRomBaseAddr: 'h1C008080, + BootAddr: 'h1C008080, EnablePrivateFpu: 1, EnablePrivateFpDivSqrt: 0, - EnableSharedFpu: 0, - EnableSharedFpDivSqrt: 0, - NumSharedFpu: 0, NumAxiIn: NumAxiSubordinatePorts, NumAxiOut: NumAxiManagerPorts, AxiIdInWidth: AxiSubordinateIdwidth, AxiIdOutWidth:AxiManagerIdwidth, - AxiAddrWidth: 48, + AxiAddrWidth: 32, AxiDataInWidth: 64, AxiDataOutWidth: 64, AxiUserWidth: 10, diff --git a/rtl/cluster_bus_wrap.sv b/rtl/cluster_bus_wrap.sv index 775e5622..51f64712 100644 --- a/rtl/cluster_bus_wrap.sv +++ b/rtl/cluster_bus_wrap.sv @@ -107,7 +107,7 @@ module cluster_bus_wrap `AXI_ASSIGN_RESP_STRUCT(axi_master_resps[1], periph_master_resp_i) `AXI_ASSIGN_REQ_STRUCT(ext_master_req_o, axi_master_reqs[2]) `AXI_ASSIGN_RESP_STRUCT(axi_master_resps[2], ext_master_resp_i) - + // Address Map Rule typedef struct packed { logic [AXI_ADDR_WIDTH-1:0] idx ; @@ -131,28 +131,28 @@ module cluster_bus_wrap start_addr: cluster_base_addr + ClusterPeripheralsOffs, end_addr: cluster_base_addr + ClusterExternalOffs }; - assign addr_map[2] = '{ // everything above cluster to ext_slave + assign addr_map[2] = '{ // everything below cluster to ext_slave idx: 2, start_addr: cluster_base_addr + ClusterExternalOffs, - end_addr: 32'hFFFF_FFFF + end_addr: 'hFFFF_FFFF }; - assign addr_map[3] = '{ // everything below cluster + assign addr_map[3] = '{ // everything above cluster idx: 2, start_addr: 'h0, end_addr: cluster_base_addr }; - + localparam int unsigned MAX_TXNS_PER_SLV_PORT = (DMA_NB_OUTSND_BURSTS > NB_CORES) ? - DMA_NB_OUTSND_BURSTS : NB_CORES; + DMA_NB_OUTSND_BURSTS : NB_CORES; localparam xbar_cfg_t AXI_XBAR_CFG = '{ NoSlvPorts: NB_SLAVE, NoMstPorts: NB_MASTER, MaxMstTrans: MAX_TXNS_PER_SLV_PORT, //The TCDM ports do not support - //outstanding transactiions anyways + //outstanding transactions anyways MaxSlvTrans: DMA_NB_OUTSND_BURSTS + NB_CORES, //Allow up to 4 in-flight transactions //per slave port - FallThrough: 1'b0, //Use the reccomended default config + FallThrough: 1'b0, //Use the recommended default config LatencyMode: axi_pkg::NO_LATENCY, // CUT_ALL_AX | axi_pkg::DemuxW, PipelineStages: 0, AxiIdWidthSlvPorts: AXI_ID_IN_WIDTH, @@ -163,7 +163,6 @@ module cluster_bus_wrap NoAddrRules: N_RULES }; - axi_xbar #( .Cfg ( AXI_XBAR_CFG ), .ATOPs ( 1'b0 ), diff --git a/rtl/cluster_peripherals.sv b/rtl/cluster_peripherals.sv index 36e3d520..b380abb5 100644 --- a/rtl/cluster_peripherals.sv +++ b/rtl/cluster_peripherals.sv @@ -102,6 +102,7 @@ module cluster_peripherals input logic [NB_CORES-1:0][3:0] hwpe_events_i, output logic hwpe_en_o, output logic [$clog2(NB_HWPES)-1:0] hwpe_sel_o, + output logic idma_en_o, output hci_package::hci_interconnect_ctrl_t hci_ctrl_o, // Control ports @@ -192,6 +193,8 @@ module cluster_peripherals .hwpe_sel_o ( hwpe_sel_o ), .hci_ctrl_o ( hci_ctrl_o ), + .idma_en_o ( idma_en_o ), + .fregfile_disable_o ( fregfile_disable_o ), diff --git a/rtl/core_demux_wrap.sv b/rtl/core_demux_wrap.sv index c509a242..60d5893b 100644 --- a/rtl/core_demux_wrap.sv +++ b/rtl/core_demux_wrap.sv @@ -1,19 +1,19 @@ module core_demux_wrap #( - parameter int unsigned AddrWidth = 32 , - parameter int unsigned DataWidth = 32 , - parameter int unsigned RemapAddress = 1 , - parameter int unsigned ClustAlias = 1 , - parameter int unsigned ClustAliasBase = 12'h000 , - parameter int unsigned NumExtPerf = 5 , - parameter type core_data_req_t = logic , - parameter type core_data_rsp_t = logic , + parameter int unsigned AddrWidth = 32 , + parameter int unsigned DataWidth = 32 , + parameter int unsigned RemapAddress = 1 , + parameter int unsigned ClustAlias = 1 , + parameter int unsigned ClustAliasBase = 12'h000 , + parameter int unsigned ClustBaseAddr = 32'h10000000, + parameter int unsigned NumExtPerf = 5 , + parameter type core_data_req_t = logic , + parameter type core_data_rsp_t = logic , localparam int unsigned ByteEnable = DataWidth/8 )( input logic clk_i , input logic rst_ni , input logic test_en_i , input logic clk_en_i , - input logic [3:0] base_addr_i , output logic [NumExtPerf-1:0] ext_perf_o , input core_data_req_t core_data_req_i , output core_data_rsp_t core_data_rsp_o , @@ -60,12 +60,12 @@ data_periph_demux #( .BYTE_ENABLE_BIT ( ByteEnable ), .REMAP_ADDRESS ( RemapAddress ), .CLUSTER_ALIAS ( ClustAlias ), - .CLUSTER_ALIAS_BASE ( ClustAliasBase ) + .CLUSTER_ALIAS_BASE ( ClustAliasBase ), + .CLUSTER_BASE_ADDR ( ClustBaseAddr ) ) data_periph_demux_i ( .clk ( clk_i ), .rst_ni ( rst_ni ), .test_en_i ( test_en_i ), - .base_addr_i ( base_addr_i ), .data_req_i ( core_data_req_i.req ), .data_add_i ( core_data_req_i.add ), .data_wen_i ( ~core_data_req_i.we ), //inverted when using OR10N diff --git a/rtl/core_region.sv b/rtl/core_region.sv index 6522ce7d..50813b3f 100644 --- a/rtl/core_region.sv +++ b/rtl/core_region.sv @@ -47,6 +47,7 @@ import rapid_recovery_pkg::*; parameter int unsigned FP_DIVSQRT = 0, parameter int unsigned DEBUG_START_ADDR = 32'h1A110000, + parameter int unsigned CLUSTER_BASE = 32'h10000000, parameter type core_data_req_t = logic, parameter type core_data_rsp_t = logic, @@ -61,12 +62,12 @@ import rapid_recovery_pkg::*; input logic [3:0] core_id_i, input logic [5:0] cluster_id_i, - + input logic irq_req_i, output logic irq_ack_o, input logic [4:0] irq_id_i, output logic [4:0] irq_ack_id_o, - + input logic clock_en_i, input logic fetch_en_i, @@ -349,7 +350,7 @@ import rapid_recovery_pkg::*; assign boot_addr = boot_addr_i & 32'hFFFFFF00; // RI5CY expects 0x80 offset, Ibex expects 0x00 offset (adds reset offset 0x80 internally) // Core busy assign core_busy_o = ~core_sleep; - + if (INSTR_RDATA_WIDTH == 128) begin instr_width_converter ibex_width_converter ( .clk_i ( clk_i ), @@ -381,7 +382,7 @@ import rapid_recovery_pkg::*; assign core_instr_r_rdata = instr_r_rdata_i; assign core_instr_r_valid = instr_r_valid_i; end - + obi_pulp_adapter i_obi_pulp_adapter_mem ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -482,12 +483,12 @@ import rapid_recovery_pkg::*; always @(posedge clk_i) begin : CHECK_ASSERTIONS `ifndef CLUSTER_ALIAS - if ((core_data_req_o.req == 1'b1) && (core_data_req_o.add < 32'h1000_0000)) begin - $error("ERROR_1 (0x00000000 -> 0x10000000) : Data interface is making a request on unmapped region --> %8x\t at time %t [ns]" ,core_data_req_o.add, $time()/1000 ); + if ((core_data_req_o.req == 1'b1) && (core_data_req_o.add < CLUSTER_BASE)) begin + $error("ERROR_1 (0x00000000 -> 0x%8x) : Data interface is making a request on unmapped region --> %8x\t at time %t [ns]" , CLUSTER_BASE, core_data_req_o.add, $time()/1000 ); $finish(); end - if ((core_data_req_o.req == 1'b1) && (core_data_req_o.add >= 32'h1040_0000) && ((core_data_req_o.add < 32'h1A00_0000))) begin - $error("ERROR_2 (0x10400000 -> 0x1A000000) : Data interface is making a request on unmapped region --> %8x\t at time %t [ns]" ,core_data_req_o.add, $time()/1000 ); + if ((core_data_req_o.req == 1'b1) && (core_data_req_o.add >= (CLUSTER_BASE + 32'h0040_0000)) && ((core_data_req_o.add < (CLUSTER_BASE + 32'h0A00_0000)))) begin + $error("ERROR_2 (0x%8x -> 0x%8x) : Data interface is making a request on unmapped region --> %8x\t at time %t [ns]" , CLUSTER_BASE + 32'h0040_0000, CLUSTER_BASE + 32'h0A00_0000, core_data_req_o.add, $time()/1000 ); $finish(); end `endif @@ -514,6 +515,8 @@ import rapid_recovery_pkg::*; FILE=$fopen(FILENAME,"w"); end + //FIXME: remove commented code + // BOOT code is loaded in this dummy ROM_MEMORY /* -----\/----- EXCLUDED -----\/----- generate diff --git a/rtl/data_periph_demux.sv b/rtl/data_periph_demux.sv index a89d0162..4373019a 100644 --- a/rtl/data_periph_demux.sv +++ b/rtl/data_periph_demux.sv @@ -4,7 +4,7 @@ * * This code is under development and not yet released to the public. * Until it is released, the code is under the copyright of ETH Zurich and - * the University of Bologna, and may contain confidential and/or unpublished + * the University of Bologna, and may contain confidential and/or unpublished * work. Any reuse/redistribution is strictly forbidden without written * permission from ETH Zurich. * @@ -22,13 +22,13 @@ module data_periph_demux parameter int unsigned BYTE_ENABLE_BIT = DATA_WIDTH/8, parameter int unsigned REMAP_ADDRESS = 0, parameter int unsigned CLUSTER_ALIAS = 1, - parameter int unsigned CLUSTER_ALIAS_BASE = 12'h000 + parameter int unsigned CLUSTER_ALIAS_BASE = 12'h000, + parameter int unsigned CLUSTER_BASE_ADDR = 32'h10000000 ) ( input logic clk, input logic rst_ni, input logic test_en_i, - input logic [3:0] base_addr_i, // CORE SIDE input logic data_req_i, @@ -62,7 +62,7 @@ module data_periph_demux input logic data_r_valid_i_EXT, input logic [DATA_WIDTH - 1:0] data_r_rdata_i_EXT, input logic data_r_opc_i_EXT, - + // Peripheral interconnect SIDE output logic data_req_o_PE, output logic [ADDR_WIDTH - 1:0] data_add_o_PE, @@ -73,19 +73,20 @@ module data_periph_demux input logic data_r_valid_i_PE, input logic data_r_opc_i_PE, input logic [DATA_WIDTH - 1:0] data_r_rdata_i_PE, - + // Performance Counters output logic perf_l2_ld_o, // nr of L2 loads output logic perf_l2_st_o, // nr of L2 stores output logic perf_l2_ld_cyc_o, // cycles used for L2 loads output logic perf_l2_st_cyc_o // cycles used for L2 stores ); - + logic [10:0] CLUSTER_ALIAS_BASE_11; logic [11:0] CLUSTER_ALIAS_BASE_12; - + localparam logic [3:0] BASE_ADDR = CLUSTER_BASE_ADDR[ADDR_WIDTH-1:ADDR_WIDTH-4]; + logic s_data_req_PE; - logic s_data_gnt_PE; + logic s_data_gnt_PE; logic [DATA_WIDTH - 1:0] s_data_r_data_PE; logic s_data_r_valid_PE; logic s_data_r_opc_PE; @@ -102,12 +103,12 @@ module data_periph_demux logic [DATA_WIDTH - 1:0] data_wdata_to_L2; logic [BYTE_ENABLE_BIT - 1:0] data_be_to_L2; logic data_gnt_from_L2; - + enum logic [1:0] {SH, PE, EXT } request_destination, destination; - + logic [ADDR_WIDTH - 1:0] data_add_int; - + // Signal to PERIPH FIFO logic data_busy_PE_fifo; logic data_req_PE_fifo; @@ -116,7 +117,7 @@ module data_periph_demux logic [DATA_WIDTH - 1:0] data_wdata_PE_fifo; logic [BYTE_ENABLE_BIT - 1:0] data_be_PE_fifo; logic data_gnt_PE_fifo; - + logic data_r_valid_PE_fifo; logic data_r_opc_PE_fifo; logic [DATA_WIDTH - 1:0] data_r_rdata_PE_fifo; @@ -133,17 +134,17 @@ module data_periph_demux always_comb begin - TCDM_RW = {base_addr_i, 8'h00} + 0; - TCDM_TS = {base_addr_i, 8'h00} + 1; - DEM_PER = {base_addr_i, 8'h00} + 2; + TCDM_RW = {BASE_ADDR, 8'h00} + 0; + TCDM_TS = {BASE_ADDR, 8'h00} + 1; + DEM_PER = {BASE_ADDR, 8'h00} + 2; end - - - - // This section is used to swap the 4 most significant bits of the address - // with the ones that are provided by the base_addr_i - // If data_add_i[31:28] == base_addr_i then data_add_i[31:28] are changed in 4'b0001 - // If data_add_i[31:28] == 4'b0001 --> then th data_add_i[31:28] is changed in base_addr_i + + + + // This section is used to swap the 4 most significant bits of the address + // with the ones that are provided by the BASE_ADDR + // If data_add_i[31:28] == BASE_ADDR then data_add_i[31:28] are changed in 4'b0001 + // If data_add_i[31:28] == 4'b0001 --> then th data_add_i[31:28] is changed in BASE_ADDR // In the other cases, the address is unchanged assign data_add_int[27:0] = data_add_i[27:0]; @@ -151,13 +152,13 @@ module data_periph_demux if (REMAP_ADDRESS == 1) begin always_comb begin - if(data_add_i[31:28] == base_addr_i) + if(data_add_i[31:28] == BASE_ADDR) begin data_add_int[31:28] = 4'b0001; end else if(data_add_int[31:28] == 4'b0001) begin - data_add_int[31:28] = base_addr_i; + data_add_int[31:28] = BASE_ADDR; end else begin @@ -165,7 +166,7 @@ if (REMAP_ADDRESS == 1) begin end end end else begin - assign data_add_int[31:28] = data_add_i[31:28]; + assign data_add_int[31:28] = data_add_i[31:28]; end //******************************************************** @@ -179,7 +180,7 @@ end assign data_add_to_L2 = data_add_int; assign data_wen_to_L2 = data_wen_i; assign data_wdata_to_L2 = data_wdata_i; - assign data_be_to_L2 = data_be_i; + assign data_be_to_L2 = data_be_i; always_ff @(posedge clk, negedge rst_ni) begin : _UPDATE_RESPONSE_DESTINATION_ @@ -196,42 +197,42 @@ end `ifdef DEM_PER_BEFORE_TCDM_TS - TCDM_RW `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE `endif : + TCDM_RW `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE `endif : begin if(data_add_int[19:14] == 6'b11_1111) request_destination <= EXT; - else + else request_destination <= SH; end // CLUSTER or DEM peripherals (mappping based on Germain suggestion) - TCDM_TS `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+1) `endif: + TCDM_TS `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+1) `endif: begin request_destination <= SH; - end - - DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: + end + + DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: begin request_destination <= PE; end `else - - TCDM_RW, TCDM_TS `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE, (CLUSTER_ALIAS_BASE+1) `endif : - begin - request_destination <= SH; + + TCDM_RW, TCDM_TS `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE, (CLUSTER_ALIAS_BASE+1) `endif : + begin + request_destination <= SH; end // CLUSTER - DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: + DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: begin if(data_add_int[14]) // DEMUX PERIPHERALS - request_destination <= EXT; + request_destination <= EXT; else request_destination <= PE; end `endif - default: - begin - request_destination <= PE; + default: + begin + request_destination <= PE; end // CLUSTER PERIPHERAL and REst of the memory map endcase @@ -241,27 +242,27 @@ end end end - + // USED FOR THE PE FSM always_comb begin : _UPDATE_REQUEST_DESTINATION_ `ifdef DEM_PER_BEFORE_TCDM_TS case(data_add_int[31:20]) - TCDM_RW `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE `endif : + TCDM_RW `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE `endif : begin if(data_add_int[19:14] == 6'b11_1111) destination = EXT; - else + else destination = SH; end // CLUSTER or DEM peripherals (mappping based on Germain suggestion) - TCDM_TS `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+1) `endif: + TCDM_TS `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+1) `endif: begin destination = SH; - end - - DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: + end + + DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: begin destination = PE; end @@ -269,33 +270,33 @@ end endcase `else case(data_add_int[31:20]) - TCDM_RW, TCDM_TS `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE, (CLUSTER_ALIAS_BASE+1) `endif : - begin + TCDM_RW, TCDM_TS `ifdef CLUSTER_ALIAS , CLUSTER_ALIAS_BASE, (CLUSTER_ALIAS_BASE+1) `endif : + begin destination = SH; // CLUSTER end - DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: + DEM_PER `ifdef CLUSTER_ALIAS , (CLUSTER_ALIAS_BASE+2) `endif: begin if(data_add_int[14]) // DEMUX PERIPHERALS - destination = EXT; + destination = EXT; else - destination = PE; + destination = PE; end // DEMUX PERIPHERALS default: begin destination = PE; end // CLUSTER PERIPHERAL and REst of the memory map endcase `endif - end - - + end + + always_comb - begin : L1_REQUEST_ARBITER + begin : L1_REQUEST_ARBITER `ifdef DEM_PER_BEFORE_TCDM_TS `ifdef CLUSTER_ALIAS if( ( data_add_int[31:21] == TCDM_RW[11:1]) || ( data_add_int[31:21] == CLUSTER_ALIAS_BASE_11 ) ) //LOGARITHMIC INTERCONNECT --> 31:20 --> 0x100 or 0x101 or ALIAS (0x000 or 0x001) or DEM PERIPH - `else + `else if(data_add_int[31:21] == TCDM_RW[11:1]) //LOGARITHMIC INTERCONNECT --> 31:20 --> 0x100 or 0x101 or DEM PERIPH `endif begin : _TO_DEM_PER_L2_ @@ -312,7 +313,7 @@ end data_gnt_o = data_gnt_i_SH; end end - else + else begin : _TO_L2_LEVEL_ data_req_o_SH = 1'b0; data_req_to_L2 = data_req_i; @@ -323,7 +324,7 @@ end `ifdef CLUSTER_ALIAS if( ( data_add_int[31:21] == TCDM_RW[11:1]) || ( data_add_int[31:21] == CLUSTER_ALIAS_BASE_11) ) //LOGARITHMIC INTERCONNECT --> 31:20 --> 0x100 or 0x101 or ALIAS (0x000 or 0x001) - `else + `else if(data_add_int[31:21] == TCDM_RW[11:1]) //LOGARITHMIC INTERCONNECT --> 31:20 --> 0x100 or 0x101 `endif begin : _TO_CLUSTER_ @@ -331,7 +332,7 @@ end data_req_to_L2 = 1'b0; data_gnt_o = data_gnt_i_SH; end - else + else begin : _TO_L2_LEVEL_ data_req_o_SH = 1'b0; data_req_to_L2 = data_req_i; @@ -339,11 +340,11 @@ end end `endif end - - - - - + + + + + //******************************************************** //************** LEVEL 2 REQUEST ARBITER ***************** //******************************************************** @@ -363,17 +364,17 @@ end `ifdef DEM_PER_BEFORE_TCDM_TS `ifdef CLUSTER_ALIAS - if ( ((data_add_int[31:20] == TCDM_RW ) || (data_add_int[31:20] == CLUSTER_ALIAS) ) && (data_add_int[19:14] == 6'b11_1111) ) + if ( ((data_add_int[31:20] == TCDM_RW ) || (data_add_int[31:20] == CLUSTER_ALIAS) ) && (data_add_int[19:14] == 6'b11_1111) ) `else - if ( (data_add_int[31:20] == DEM_PER ) && (data_add_int[19:14] == 6'b11_1111) ) + if ( (data_add_int[31:20] == DEM_PER ) && (data_add_int[19:14] == 6'b11_1111) ) `endif begin : _TO_DEMUX_PERIPH_ //Peripheral --> add_i[31:0] --> 0x100F_FC00 to 0x100F_FFFF data_req_PE_fifo = 1'b0; data_req_o_EXT = data_req_to_L2; data_gnt_from_L2 = data_gnt_i_EXT; end - else - begin : _TO_PERIPHERAL_INTERCO_ + else + begin : _TO_PERIPHERAL_INTERCO_ data_req_PE_fifo = s_data_req_PE; data_req_o_EXT = 1'b0; data_gnt_from_L2 = s_data_gnt_PE; @@ -382,17 +383,17 @@ end `else `ifdef CLUSTER_ALIAS - if ( ((data_add_int[31:20] == DEM_PER ) || (data_add_int[31:20] == (CLUSTER_ALIAS_BASE+2)) ) && (data_add_int[14] == 1'b1) ) + if ( ((data_add_int[31:20] == DEM_PER ) || (data_add_int[31:20] == (CLUSTER_ALIAS_BASE+2)) ) && (data_add_int[14] == 1'b1) ) `else - if ( (data_add_int[31:20] == DEM_PER ) && (data_add_int[14] == 1'b1) ) + if ( (data_add_int[31:20] == DEM_PER ) && (data_add_int[14] == 1'b1) ) `endif begin : _TO_DEMUX_PERIPH_ //Peripheral --> add_i[31:0] --> 0x1020_4000 to 0x1020_7FFF data_req_PE_fifo = 1'b0; data_req_o_EXT = data_req_to_L2; data_gnt_from_L2 = data_gnt_i_EXT; end - else - begin : _TO_PERIPHERAL_INTERCO_ + else + begin : _TO_PERIPHERAL_INTERCO_ data_req_PE_fifo = s_data_req_PE; data_req_o_EXT = 1'b0; data_gnt_from_L2 = s_data_gnt_PE; @@ -408,7 +409,7 @@ end always_comb begin: _RESPONSE_ARBITER_ case(request_destination) - SH: + SH: begin data_r_valid_o = data_r_valid_i_SH; data_r_rdata_o = data_r_rdata_i_SH; @@ -605,21 +606,21 @@ logic [31:0] STALL_L2; logic clear_regs, enable_regs; - always_ff @(posedge clk or negedge rst_ni) + always_ff @(posedge clk or negedge rst_ni) begin - if(~rst_ni) + if(~rst_ni) begin STALL_TCDM <= '0; STALL_L2 <= '0; - end + end else begin if(clear_regs) - begin + begin STALL_TCDM <= '0; STALL_L2 <= '0; end - else + else if( enable_regs ) begin if( data_req_o_SH & ~data_gnt_i_SH ) diff --git a/rtl/idma_wrap.sv b/rtl/idma_wrap.sv index ab4954b0..f933c0c1 100644 --- a/rtl/idma_wrap.sv +++ b/rtl/idma_wrap.sv @@ -6,68 +6,81 @@ * dmac_wrap.sv * Thomas Benz * Michael Rogenmoser + * Georg Rutishauser */ // DMA Core wrapper `include "axi/assign.svh" `include "axi/typedef.svh" +`include "obi/typedef.svh" `include "idma/typedef.svh" `include "register_interface/typedef.svh" +`define MY_MAX(a, b) (a > b ? a : b) + module dmac_wrap #( - parameter int unsigned NB_CORES = 4, - parameter int unsigned AXI_ADDR_WIDTH = 32, - parameter int unsigned AXI_DATA_WIDTH = 64, - parameter int unsigned AXI_USER_WIDTH = 6, - parameter int unsigned AXI_ID_WIDTH = 4, - parameter int unsigned PE_ID_WIDTH = 1, - parameter int unsigned NB_PE_PORTS = 1, - parameter int unsigned DATA_WIDTH = 32, - parameter int unsigned ADDR_WIDTH = 32, - parameter int unsigned BE_WIDTH = DATA_WIDTH/8, - parameter int unsigned NUM_STREAMS = 1, // Only 1 for now - parameter int unsigned TCDM_SIZE = 0, - parameter int unsigned TwoDMidend = 1, // Leave this on for now - parameter int unsigned NB_OUTSND_BURSTS = 8, - parameter int unsigned GLOBAL_QUEUE_DEPTH = 16, - parameter int unsigned BACKEND_QUEUE_DEPTH = 16, - parameter logic [AXI_ADDR_WIDTH-1:0] ClusterBaseAddr = 'h10000000, - parameter type axi_req_t = logic, - parameter type axi_resp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - input logic test_mode_i, - XBAR_PERIPH_BUS.Slave pe_ctrl_slave[NB_PE_PORTS-1:0], - hci_core_intf.target ctrl_slave[0:NB_CORES-1], - hci_core_intf.initiator tcdm_master[0:3], - output axi_req_t ext_master_req_o, - input axi_resp_t ext_master_resp_i, - output logic [NB_CORES-1:0] term_event_o, - output logic [NB_CORES-1:0] term_irq_o, - output logic [NB_PE_PORTS-1:0] term_event_pe_o, - output logic [NB_PE_PORTS-1:0] term_irq_pe_o, - output logic busy_o -); - - localparam int unsigned NumRegs = NB_CORES+NB_PE_PORTS; - localparam int unsigned MstIdxWidth = AXI_ID_WIDTH; - localparam int unsigned SlvIdxWidth = AXI_ID_WIDTH - $clog2(NUM_STREAMS); + parameter int unsigned NB_CORES = 4, + parameter int unsigned AXI_ADDR_WIDTH = 32, + parameter int unsigned AXI_DATA_WIDTH = 32, + parameter int unsigned AXI_USER_WIDTH = 6, + parameter int unsigned AXI_ID_WIDTH = 4, + parameter int unsigned PE_ID_WIDTH = 1, + parameter int unsigned NB_PE_PORTS = 1, + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned ADDR_WIDTH = 32, + parameter int unsigned BE_WIDTH = DATA_WIDTH / 8, + parameter int unsigned IDMA_BURST_LENGTH = 5, + parameter type axi_req_t = logic, + parameter type axi_resp_t = logic, + // bidirectional streams: range 1 to 8 + parameter int unsigned NUM_BIDIR_STREAMS = 1, + parameter int unsigned NB_OUTSND_BURSTS = 8, + // queue depth per stream + parameter int unsigned GLOBAL_QUEUE_DEPTH = 2, + // mux read ports between tcdm-tcdm and tcdm-axi?BeWidth + parameter bit MUX_READ = 1'b0, + parameter bit TCDM_MEM2BANKS = 1'b0, + // when using mem2banks (implies AXI_DATA_WIDTH==64): + // 4 ports per stream if read ports muxed, otherwise 6 + // when not using mem2banks: + // 2 ports per stream if read ports muxed, otherwise 3 + localparam int unsigned NB_TCDM_PORTS_PER_STRM = (2 + (!MUX_READ)) * (1 + TCDM_MEM2BANKS) +) ( // verilog_format: off // verible does not manage to align this :( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + XBAR_PERIPH_BUS.Slave pe_ctrl_slave[NB_PE_PORTS-1:0], + hci_core_intf.target ctrl_slave[0:NB_CORES-1], + hci_core_intf.initiator tcdm_master[0:3], + output axi_req_t [NUM_BIDIR_STREAMS-1:0] ext_master_req_o, + input axi_resp_t [NUM_BIDIR_STREAMS-1:0] ext_master_resp_i, + output logic [NB_CORES-1:0] term_event_o, + output logic [NB_CORES-1:0] term_irq_o, + output logic [NB_PE_PORTS-1:0] term_event_pe_o, + output logic [NB_PE_PORTS-1:0] term_irq_pe_o, + output logic busy_o +); // verilog_format: on + + localparam int unsigned NumRegs = NB_CORES + NB_PE_PORTS; + localparam int unsigned NumStreams = 32'd2 * NUM_BIDIR_STREAMS; + localparam int unsigned StreamWidth = cf_math_pkg::idx_width(NumStreams); // CORE --> MCHAN CTRL INTERFACE BUS SIGNALS - logic [NumRegs-1:0][DATA_WIDTH-1:0] config_wdata; - logic [NumRegs-1:0][ADDR_WIDTH-1:0] config_add; + logic [NumRegs-1:0][ DATA_WIDTH-1:0] config_wdata; + logic [NumRegs-1:0][ ADDR_WIDTH-1:0] config_add; logic [NumRegs-1:0] config_req; logic [NumRegs-1:0] config_wen; - logic [NumRegs-1:0][BE_WIDTH-1:0] config_be; + logic [NumRegs-1:0][ BE_WIDTH-1:0] config_be; logic [NumRegs-1:0][PE_ID_WIDTH-1:0] config_id; logic [NumRegs-1:0] config_gnt; - logic [NumRegs-1:0][DATA_WIDTH-1:0] config_r_rdata; + logic [NumRegs-1:0][ DATA_WIDTH-1:0] config_r_rdata; logic [NumRegs-1:0] config_r_valid; logic [NumRegs-1:0] config_r_opc; logic [NumRegs-1:0][PE_ID_WIDTH-1:0] config_r_id; + logic datapath_clk_gated; + // tie-off pe control ports for (genvar i = 0; i < NB_CORES; i++) begin : gen_ctrl_registers assign config_add[i] = ctrl_slave[i].add; @@ -79,16 +92,16 @@ module dmac_wrap #( assign ctrl_slave[i].gnt = config_gnt[i]; assign ctrl_slave[i].r_opc = config_r_opc[i]; assign ctrl_slave[i].r_valid = config_r_valid[i]; - assign ctrl_slave[i].r_data = config_r_rdata[i]; + assign ctrl_slave[i].r_data = config_r_rdata[i]; end for (genvar i = 0; i < NB_PE_PORTS; i++) begin : gen_pe_ctrl_registers - assign config_add[NB_CORES+i] = pe_ctrl_slave[i].add; - assign config_req[NB_CORES+i] = pe_ctrl_slave[i].req; - assign config_wdata[NB_CORES+i] = pe_ctrl_slave[i].wdata; - assign config_wen[NB_CORES+i] = pe_ctrl_slave[i].wen; - assign config_be[NB_CORES+i] = pe_ctrl_slave[i].be; - assign config_id[NB_CORES+i] = pe_ctrl_slave[i].id; + assign config_add[NB_CORES+i] = pe_ctrl_slave[i].add; + assign config_req[NB_CORES+i] = pe_ctrl_slave[i].req; + assign config_wdata[NB_CORES+i] = pe_ctrl_slave[i].wdata; + assign config_wen[NB_CORES+i] = pe_ctrl_slave[i].wen; + assign config_be[NB_CORES+i] = pe_ctrl_slave[i].be; + assign config_id[NB_CORES+i] = pe_ctrl_slave[i].id; assign pe_ctrl_slave[i].gnt = config_gnt[NB_CORES+i]; assign pe_ctrl_slave[i].r_opc = config_r_opc[NB_CORES+i]; assign pe_ctrl_slave[i].r_valid = config_r_valid[NB_CORES+i]; @@ -96,74 +109,198 @@ module dmac_wrap #( assign pe_ctrl_slave[i].r_id = config_r_id[NB_CORES+i]; end - // AXI4+ATOP types - typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; - typedef logic [ADDR_WIDTH-1:0] mem_addr_t; - typedef logic [AXI_DATA_WIDTH-1:0] data_t; - typedef logic [SlvIdxWidth-1:0] slv_id_t; - typedef logic [MstIdxWidth-1:0] mst_id_t; + // Types types + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [ADDR_WIDTH-1:0] mem_addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_ID_WIDTH-1:0] id_t; typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; typedef logic [AXI_USER_WIDTH-1:0] user_t; - // AXI4+ATOP channels typedefs - `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t) - `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_AW_CHAN_T(mem_aw_chan_t, mem_addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t) - `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_AR_CHAN_T(mem_ar_chan_t, mem_addr_t, mst_id_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t) - `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t) - `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) - `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) - `AXI_TYPEDEF_REQ_T(mem_req_t, mem_aw_chan_t, w_chan_t, mem_ar_chan_t) - `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) - `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) + + // // AXI4+ATOP channels typedefs + //`AXI_TYPEDEF_ALL(axi_int, addr_t, id_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, data_t, id_t, user_t) + // Memory Init typedefs + /// init read request + typedef struct packed { + logic [AXI_ADDR_WIDTH-1:0] cfg; + logic [AXI_DATA_WIDTH-1:0] term; + logic [AXI_DATA_WIDTH/8-1:0] strb; + logic [AXI_ID_WIDTH-1:0] id; + } init_req_chan_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic req_valid; + logic rsp_ready; + } init_req_t; + + typedef struct packed {logic [AXI_DATA_WIDTH-1:0] init;} init_rsp_chan_t; + + typedef struct packed { + init_rsp_chan_t rsp_chan; + logic rsp_valid; + logic req_ready; + } init_rsp_t; + + // OBI typedefs (ADDR_WIDTH) + `OBI_TYPEDEF_MINIMAL_A_OPTIONAL(a_optional_t) + `OBI_TYPEDEF_MINIMAL_R_OPTIONAL(r_optional_t) + `OBI_TYPEDEF_A_CHAN_T(obi_a_chan_t, ADDR_WIDTH, AXI_DATA_WIDTH, AXI_ID_WIDTH, a_optional_t) + `OBI_TYPEDEF_R_CHAN_T(obi_r_chan_t, AXI_DATA_WIDTH, AXI_ID_WIDTH, r_optional_t) + `OBI_TYPEDEF_REQ_T(obi_req_t, obi_a_chan_t) + `OBI_TYPEDEF_RSP_T(obi_rsp_t, obi_r_chan_t) + + obi_req_t [NUM_BIDIR_STREAMS-1:0] + obi_read_req_from_dma, + obi_read_req_from_rrc, + obi_reorg_req_from_dma, + obi_reorg_req_from_rrc, + obi_write_req_from_dma, + obi_write_req_from_rrc, + obi_read_req_muxed; + obi_rsp_t [NUM_BIDIR_STREAMS-1:0] + obi_read_rsp_to_dma, + obi_read_rsp_to_rrc, + obi_reorg_rsp_to_dma, + obi_reorg_rsp_to_rrc, + obi_write_rsp_to_dma, + obi_write_rsp_to_rrc, + obi_read_rsp_to_mux; + + // OBI typedefs (AXI_ADDR_WIDTH) + `OBI_TYPEDEF_A_CHAN_T(obi2axi_a_chan_t, AXI_ADDR_WIDTH, AXI_DATA_WIDTH, AXI_ID_WIDTH, a_optional_t) + `OBI_TYPEDEF_REQ_T(obi2axi_req_t, obi2axi_a_chan_t) + + obi2axi_req_t [NUM_BIDIR_STREAMS-1:0] + obi2axi_read_req_from_dma, + obi2axi_reorg_req_from_dma, + obi2axi_write_req_from_dma; + // BUS definitions - mst_req_t tcdm_req, soc_req; - mem_req_t tcdm_mem_req; - mst_resp_t soc_rsp; - mst_resp_t tcdm_rsp; - slv_req_t [NUM_STREAMS-1:0] dma_req; - slv_resp_t [NUM_STREAMS-1:0] dma_rsp; + axi_req_t [NUM_BIDIR_STREAMS-1:0] soc_req; + axi_resp_t [NUM_BIDIR_STREAMS-1:0] soc_rsp; + axi_req_t [ NumStreams-1:0] dma_req; + axi_resp_t [ NumStreams-1:0] dma_rsp; + // interface to structs - //`AXI_ASSIGN_FROM_REQ(ext_master_req_o, soc_req) - //`AXI_ASSIGN_TO_RESP(soc_rsp, ext_master_resp_i) + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_connect_interface + assign ext_master_req_o[s] = soc_req[s]; + assign soc_rsp[s] = ext_master_resp_i[s]; + end + + // connect RW axi buses + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin : gen_rw_axi_connection + + assign dma_req[2*s].ar = '0; + assign dma_req[2*s].ar_valid = '0; + assign dma_req[2*s].r_ready = '0; + + assign dma_req[2*s+1].aw = '0; + assign dma_req[2*s+1].aw_valid = '0; + assign dma_req[2*s+1].w = '0; + assign dma_req[2*s+1].w_valid = '0; + assign dma_req[2*s+1].b_ready = '0; - `AXI_ASSIGN_REQ_STRUCT(ext_master_req_o, soc_req) - `AXI_ASSIGN_RESP_STRUCT(soc_rsp, ext_master_resp_i) + axi_rw_join #( + .axi_req_t (axi_req_t), + .axi_resp_t(axi_resp_t) + ) i_init_axi_rw_join ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .slv_read_req_i (dma_req[2*s+1]), + .slv_read_resp_o (dma_rsp[2*s+1]), + .slv_write_req_i (dma_req[2*s]), + .slv_write_resp_o(dma_rsp[2*s]), + .mst_req_o (soc_req[s]), + .mst_resp_i (soc_rsp[s]) + ); + end // Register BUS definitions - `REG_BUS_TYPEDEF_ALL(dma_regs, logic[9:0], logic[31:0], logic[3:0]) + localparam int unsigned RegAddrWidth = 32'd10; + `REG_BUS_TYPEDEF_ALL(dma_regs, logic[RegAddrWidth-1:0], logic[DATA_WIDTH-1:0], + logic[BE_WIDTH-1:0]) dma_regs_req_t [NumRegs-1:0] dma_regs_req; dma_regs_rsp_t [NumRegs-1:0] dma_regs_rsp; // iDMA struct definitions - localparam int unsigned TFLenWidth = AXI_ADDR_WIDTH; - localparam int unsigned NumDim = 2; // Support 2D midend for 2D transfers - localparam int unsigned RepWidth = 32; - localparam int unsigned StrideWidth = 32; - typedef logic [TFLenWidth-1:0] tf_len_t; - typedef logic [RepWidth-1:0] reps_t; + localparam int unsigned TFLenWidth = AXI_ADDR_WIDTH; + localparam int unsigned NumDim = 32'd3; // Support 2D midend for 2D transfers + localparam int unsigned RepWidth = 32'd32; + localparam int unsigned StrideWidth = 32'd32; + typedef logic [TFLenWidth-1:0] tf_len_t; + typedef logic [RepWidth-1:0] reps_t; typedef logic [StrideWidth-1:0] strides_t; + typedef logic [StreamWidth-1:0] stream_width_t; + // iDMA request / response types - `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, slv_id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, id_t, addr_t, tf_len_t) `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) // iDMA ND request `IDMA_TYPEDEF_FULL_ND_REQ_T(idma_nd_req_t, idma_req_t, reps_t, strides_t) - idma_nd_req_t twod_req, twod_req_queue; - idma_req_t burst_req; - idma_rsp_t idma_rsp; + logic [StreamWidth-1:0] stream_idx; - logic fe_valid, twod_queue_valid, be_valid, be_rsp_valid; - logic fe_ready, twod_queue_ready, be_ready, be_rsp_ready; - logic trans_complete, midend_busy; - idma_pkg::idma_busy_t idma_busy; + idma_nd_req_t [NumStreams-1:0] twod_req_queue; + idma_nd_req_t twod_req; + idma_req_t [NumStreams-1:0] idma_req; + idma_rsp_t [NumStreams-1:0] idma_rsp; + + logic one_fe_valid; + logic [NumStreams-1:0] fe_valid, twod_queue_valid, be_valid, be_rsp_valid; + logic [NumStreams-1:0] fe_ready, twod_queue_ready, be_ready, be_rsp_ready; + logic [NumStreams-1:0] trans_complete, midend_busy; + idma_pkg::idma_busy_t [NumStreams-1:0] idma_busy; + + logic [NumStreams-1:0][31:0] done_id, next_id; + + // ------------------------------------------------------ + // CLOCK GATING CONTROL LOGIC + // ------------------------------------------------------ + + // A first level of clock gating is performed at cluster level: + // - the clock for the whole iDMA wrapper can be controlled via sw through the cluster control unit. + // By disabling it, iDMA becomes unresponsive to incoming requests. + // Here, another clock gating level is applied: + // - completely hw-controlled, this clock gating cell controls the datapath clock, disabling it when not needed. + + logic keep_datapath_clocked, datapath_clk_en; + + // Register to keep the clock active until event completion + // Once the transfer has started its execution (busy_o == 1'b1) + // the datapath needs to be clocked until the completion event + // has been received (|trans_complete). Then the datapath + // can be gated again. + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (rst_ni == 1'b0) begin + keep_datapath_clocked <= 1'b0; + end else if (busy_o == 1'b1) begin + keep_datapath_clocked <= 1'b1; + end else if (|trans_complete) begin + keep_datapath_clocked <= 1'b0; + end + end + + assign datapath_clk_en = (one_fe_valid | (|trans_complete) | keep_datapath_clocked); + + // // ---------------------------------------------------------------------------------------------------------- + // // DATAPATH CLOCK GATING CELL --> This gates everything except for the frontend and the periph_to_reg modules + // // ---------------------------------------------------------------------------------------------------------- + + cluster_clock_gating idma_datapath_ckgate ( + .clk_i ( clk_i ), + .en_i ( datapath_clk_en ), + .test_en_i ( test_mode_i ), + .clk_o ( datapath_clk_gated ) + ); // ------------------------------------------------------ // FRONTEND @@ -171,291 +308,895 @@ module dmac_wrap #( for (genvar i = 0; i < NumRegs; i++) begin : gen_core_regs periph_to_reg #( - .AW ( 10 ), - .DW ( 32 ), - .BW ( 8 ), - .IW ( PE_ID_WIDTH ), - .req_t ( dma_regs_req_t ), - .rsp_t ( dma_regs_rsp_t ) + .AW (RegAddrWidth), + .DW (DATA_WIDTH), + .BW (BE_WIDTH<<1), + .IW (PE_ID_WIDTH), + .req_t(dma_regs_req_t), + .rsp_t(dma_regs_rsp_t) ) i_pe_translate ( - .clk_i, + .clk_i ( clk_i ), .rst_ni, - .req_i ( config_req [i] ), - .add_i ( config_add [i][9:0] ), - .wen_i ( config_wen [i] ), - .wdata_i ( config_wdata [i] ), - .be_i ( config_be [i] ), - .id_i ( config_id [i] ), - .gnt_o ( config_gnt [i] ), - .r_rdata_o ( config_r_rdata [i] ), - .r_opc_o ( config_r_opc [i] ), - .r_id_o ( config_r_id [i] ), - .r_valid_o ( config_r_valid [i] ), - .reg_req_o ( dma_regs_req [i] ), - .reg_rsp_i ( dma_regs_rsp [i] ) + .req_i (config_req[i]), + .add_i (config_add[i][RegAddrWidth-1:0]), + .wen_i (config_wen[i]), + .wdata_i (config_wdata[i]), + .be_i (config_be[i]), + .id_i (config_id[i]), + .gnt_o (config_gnt[i]), + .r_rdata_o(config_r_rdata[i]), + .r_opc_o (config_r_opc[i]), + .r_id_o (config_r_id[i]), + .r_valid_o(config_r_valid[i]), + .reg_req_o(dma_regs_req[i]), + .reg_rsp_i(dma_regs_rsp[i]) ); end - idma_reg32_2d_frontend #( - .NumRegs ( NumRegs ), - .IdCounterWidth ( 28 ), - .dma_regs_req_t ( dma_regs_req_t ), - .dma_regs_rsp_t ( dma_regs_rsp_t ), - .burst_req_t ( idma_nd_req_t ) - ) i_idma_reg32_2d_frontend ( - .clk_i, + idma_reg32_3d #( + .NumRegs (NumRegs), + .NumStreams (NumStreams), + .IdCounterWidth(32'd32), + .reg_req_t (dma_regs_req_t), + .reg_rsp_t (dma_regs_rsp_t), + .dma_req_t (idma_nd_req_t) + ) i_idma_reg32_3d ( + .clk_i ( clk_i ), .rst_ni, - .dma_ctrl_req_i ( dma_regs_req ), - .dma_ctrl_rsp_o ( dma_regs_rsp ), - .burst_req_o ( twod_req ), - .valid_o ( fe_valid ), - .ready_i ( fe_ready ), - .backend_idle_i ( ~busy_o ), - .trans_complete_i ( trans_complete ) + .dma_ctrl_req_i(dma_regs_req), + .dma_ctrl_rsp_o(dma_regs_rsp), + .dma_req_o (twod_req), + .req_valid_o (one_fe_valid), + .req_ready_i (fe_ready[stream_idx]), + .next_id_i (next_id[stream_idx]), + .stream_idx_o (stream_idx), + .done_id_i (done_id), + .busy_i (idma_busy), + .midend_busy_i (midend_busy) ); + always_comb begin : proc_connect_valids + fe_valid = '0; + fe_valid[stream_idx] = one_fe_valid; + end + // interrupts and events (currently broadcast tx_cplt event only) assign term_event_pe_o = |trans_complete ? '1 : '0; assign term_irq_pe_o = '0; assign term_event_o = |trans_complete ? '1 : '0; assign term_irq_o = '0; - assign busy_o = midend_busy | |idma_busy; + assign busy_o = |midend_busy | |idma_busy; - // ------------------------------------------------------ - // MIDEND - // ------------------------------------------------------ + localparam int unsigned ID_WIDTH = 32; - // global (2D) request FIFO - stream_fifo #( - .DEPTH ( GLOBAL_QUEUE_DEPTH ), - .T (idma_nd_req_t ) - ) i_2D_request_fifo ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( test_mode_i ), - .usage_o (/*NOT CONNECTED*/), + for (genvar s = 0; s < NumStreams; s++) begin : gen_streams - .data_i ( twod_req ), - .valid_i ( fe_valid ), - .ready_o ( fe_ready ), + // ------------------------------------------------------ + // ID counters + // ------------------------------------------------------ + idma_transfer_id_gen #( + .IdWidth(ID_WIDTH) + ) i_idma_transfer_id_gen ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .issue_i (fe_valid[s] & fe_ready[s]), + .retire_i (trans_complete[s]), + .next_o (next_id[s]), + .completed_o(done_id[s]) + ); - .data_o ( twod_req_queue ), - .valid_o ( twod_queue_valid ), - .ready_i ( twod_queue_ready ) - ); - localparam logic [1:0][31:0] RepWidths = '{default: 32'd32}; - - idma_nd_midend #( - .NumDim ( NumDim ), - .addr_t ( addr_t ), - .idma_req_t ( idma_req_t ), - .idma_rsp_t ( idma_rsp_t ), - .idma_nd_req_t( idma_nd_req_t ), - .RepWidths ( RepWidths ) - ) i_idma_2D_midend ( - .clk_i, - .rst_ni, + // ------------------------------------------------------ + // MIDEND + // ------------------------------------------------------ + // global (2D) request FIFO + stream_fifo #( + .DEPTH(GLOBAL_QUEUE_DEPTH), + .T (idma_nd_req_t) + ) i_3D_request_fifo ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .flush_i (1'b0), + .testmode_i(test_mode_i), + .usage_o ( /*NOT CONNECTED*/), + .data_i (twod_req), + .valid_i (fe_valid[s]), + .ready_o (fe_ready[s]), + .data_o (twod_req_queue[s]), + .valid_o (twod_queue_valid[s]), + .ready_i (twod_queue_ready[s]) + ); + + localparam logic [1:0][31:0] RepWidths = '{default: 32'd32}; - .nd_req_i ( twod_req_queue ), - .nd_req_valid_i ( twod_queue_valid ), - .nd_req_ready_o ( twod_queue_ready ), + idma_nd_midend #( + .NumDim (NumDim), + .addr_t (addr_t), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .idma_nd_req_t(idma_nd_req_t), + .RepWidths (RepWidths) + ) i_idma_3D_midend ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .nd_req_i (twod_req_queue[s]), + .nd_req_valid_i (twod_queue_valid[s]), + .nd_req_ready_o (twod_queue_ready[s]), + .nd_rsp_o ( /*NOT CONNECTED*/), + .nd_rsp_valid_o (trans_complete[s]), + .nd_rsp_ready_i (1'b1), // Always ready to accept completed transfers + .burst_req_o (idma_req[s]), + .burst_req_valid_o(be_valid[s]), + .burst_req_ready_i(be_ready[s]), + .burst_rsp_i (idma_rsp[s]), + .burst_rsp_valid_i(be_rsp_valid[s]), + .burst_rsp_ready_o(be_rsp_ready[s]), + .busy_o (midend_busy[s]) + ); - .nd_rsp_o (/*NOT CONNECTED*/ ), - .nd_rsp_valid_o ( trans_complete ), - .nd_rsp_ready_i ( 1'b1 ), // Always ready to accept completed transfers + // ------------------------------------------------------ + // BACKEND + // ------------------------------------------------------ - .burst_req_o ( burst_req ), - .burst_req_valid_o( be_valid ), - .burst_req_ready_i( be_ready ), + // even channels: copy out data + if (s[0] == 1'b0) begin : gen_cpy_out - .burst_rsp_i ( idma_rsp ), - .burst_rsp_valid_i( be_rsp_valid ), - .burst_rsp_ready_o( be_rsp_ready ), + // Meta Channel Widths + localparam int unsigned axi_aw_chan_width = axi_pkg::aw_width( + AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH + ); + localparam int unsigned init_req_chan_width = $bits(init_req_chan_t); + localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t); + + + typedef struct packed { + init_req_chan_t req_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-init_req_chan_width:0] padding; + } init_read_req_chan_padded_t; + + typedef struct packed { + obi_a_chan_t a_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-obi_a_chan_width:0] padding; + } obi_read_a_chan_padded_t; + + typedef union packed { + init_read_req_chan_padded_t init; + obi_read_a_chan_padded_t obi; + } read_meta_channel_t; + + typedef struct packed { + axi_aw_chan_t aw_chan; + logic [`MY_MAX(axi_aw_chan_width, init_req_chan_width)-axi_aw_chan_width:0] padding; + } axi_write_aw_chan_padded_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic [`MY_MAX(axi_aw_chan_width, init_req_chan_width)-init_req_chan_width:0] padding; + } init_write_req_chan_padded_t; + + typedef union packed { + axi_write_aw_chan_padded_t axi; + init_write_req_chan_padded_t init; + } write_meta_channel_t; + + // local buses + init_req_t init_read_req, init_write_req; + init_rsp_t init_read_rsp, init_write_rsp; + + // BINDING between ADD_WIDTH <-> AXI_ADDR_WIDTH + assign obi_read_req_from_dma[s/2].req = obi2axi_read_req_from_dma[s/2].req ; + assign obi_read_req_from_dma[s/2].a.addr = obi2axi_read_req_from_dma[s/2].a.addr ; + assign obi_read_req_from_dma[s/2].a.we = obi2axi_read_req_from_dma[s/2].a.we ; + assign obi_read_req_from_dma[s/2].a.be = obi2axi_read_req_from_dma[s/2].a.be ; + assign obi_read_req_from_dma[s/2].a.wdata = obi2axi_read_req_from_dma[s/2].a.wdata; + assign obi_read_req_from_dma[s/2].rready = obi2axi_read_req_from_dma[s/2].rready ; + + idma_backend_synth_r_obi_rw_init_w_axi #( + .DataWidth ( AXI_DATA_WIDTH ), + .AddrWidth ( AXI_ADDR_WIDTH ), + .UserWidth ( AXI_USER_WIDTH ), + .AxiIdWidth ( AXI_ID_WIDTH ), + .NumAxInFlight ( NB_OUTSND_BURSTS ), + .BufferDepth ( 32'd3 ), + .TFLenWidth ( TFLenWidth ), + .MemSysDepth ( 32'd0 ), + .CombinedShifter ( 1'b0 ), + .RAWCouplingAvail ( 1'b0 ), + .MaskInvalidData ( 1'b0 ), + .HardwareLegalizer ( 1'b1 ), + .RejectZeroTransfers ( 1'b1 ), + .ErrorHandling ( 1'b0 ), + .Burst_len ( IDMA_BURST_LENGTH ) + ) i_idma_backend_r_obi_rw_init_w_axi ( + .clk_i ( datapath_clk_gated ), + .rst_ni ( rst_ni ), + .test_i ( test_mode_i ), + .req_valid_i ( be_valid[s] ), + .req_ready_o ( be_ready[s] ), + .req_length_i ( idma_req[s].length ), + .req_src_addr_i ( idma_req[s].src_addr ), + .req_dst_addr_i ( idma_req[s].dst_addr ), + .req_src_protocol_i ( idma_req[s].opt.src_protocol ), + .req_dst_protocol_i ( idma_req[s].opt.dst_protocol ), + .req_axi_id_i ( idma_req[s].opt.axi_id ), + .req_src_burst_i ( idma_req[s].opt.src.burst ), + .req_src_cache_i ( idma_req[s].opt.src.cache ), + .req_src_lock_i ( idma_req[s].opt.src.lock ), + .req_src_prot_i ( idma_req[s].opt.src.prot ), + .req_src_qos_i ( idma_req[s].opt.src.qos ), + .req_src_region_i ( idma_req[s].opt.src.region ), + .req_dst_burst_i ( idma_req[s].opt.dst.burst ), + .req_dst_cache_i ( idma_req[s].opt.dst.cache ), + .req_dst_lock_i ( idma_req[s].opt.dst.lock ), + .req_dst_prot_i ( idma_req[s].opt.dst.prot ), + .req_dst_qos_i ( idma_req[s].opt.dst.qos ), + .req_dst_region_i ( idma_req[s].opt.dst.region ), + .req_decouple_aw_i ( idma_req[s].opt.beo.decouple_aw ), + .req_decouple_rw_i ( idma_req[s].opt.beo.decouple_rw ), + .req_src_max_llen_i ( idma_req[s].opt.beo.src_max_llen ), + .req_dst_max_llen_i ( idma_req[s].opt.beo.dst_max_llen ), + .req_src_reduce_len_i ( idma_req[s].opt.beo.src_reduce_len ), + .req_dst_reduce_len_i ( idma_req[s].opt.beo.dst_reduce_len ), + .req_last_i ( idma_req[s].opt.last ), + .rsp_valid_o ( be_rsp_valid[s] ), + .rsp_ready_i ( be_rsp_ready[s] ), + .rsp_cause_o ( idma_rsp[s].pld.cause ), + .rsp_err_type_o ( idma_rsp[s].pld.err_type ), + .rsp_burst_addr_o ( idma_rsp[s].pld.burst_addr ), + .rsp_error_o ( idma_rsp[s].error ), + .rsp_last_o ( idma_rsp[s].last ), + + .eh_req_valid_i ( '0 ), + .eh_req_ready_o ( /* NOT CONNECTED */ ), + .eh_req_i ( '0 ), + + .init_read_req_valid_o ( init_read_req.req_valid ), + .init_read_req_config_o ( init_read_req.req_chan.cfg ), + .init_read_req_ready_i ( init_read_rsp.req_ready ), + + .init_read_rsp_valid_i ( init_read_rsp.rsp_valid ), + .init_read_rsp_init_i ( init_read_rsp.rsp_chan.init ), + .init_read_rsp_ready_o ( init_read_req.rsp_ready ), + + .obi_read_req_a_req_o ( obi2axi_read_req_from_dma[s/2].req ), + .obi_read_req_a_addr_o ( obi2axi_read_req_from_dma[s/2].a.addr ), + .obi_read_req_a_we_o ( obi2axi_read_req_from_dma[s/2].a.we ), + .obi_read_req_a_be_o ( obi2axi_read_req_from_dma[s/2].a.be ), + .obi_read_req_a_wdata_o ( obi2axi_read_req_from_dma[s/2].a.wdata ), + .obi_read_req_r_ready_o ( obi2axi_read_req_from_dma[s/2].rready ), + + .obi_read_rsp_a_gnt_i ( obi_read_rsp_to_dma[s/2].gnt ), + .obi_read_rsp_r_valid_i ( obi_read_rsp_to_dma[s/2].rvalid ), + .obi_read_rsp_r_rdata_i ( obi_read_rsp_to_dma[s/2].r.rdata ), + .obi_read_rsp_r_rid_i ( obi_read_rsp_to_dma[s/2].r.rid ), + .obi_read_rsp_r_err_i ( obi_read_rsp_to_dma[s/2].r.err ), + + .axi_aw_id_o ( dma_req[s].aw.id ), + .axi_aw_addr_o ( dma_req[s].aw.addr ), + .axi_aw_len_o ( dma_req[s].aw.len ), + .axi_aw_size_o ( dma_req[s].aw.size ), + .axi_aw_burst_o ( dma_req[s].aw.burst ), + .axi_aw_lock_o ( dma_req[s].aw.lock ), + .axi_aw_cache_o ( dma_req[s].aw.cache ), + .axi_aw_prot_o ( dma_req[s].aw.prot ), + .axi_aw_qos_o ( dma_req[s].aw.qos ), + .axi_aw_region_o ( dma_req[s].aw.region ), + .axi_aw_atop_o ( dma_req[s].aw.atop ), + .axi_aw_user_o ( dma_req[s].aw.user ), + .axi_aw_valid_o ( dma_req[s].aw_valid ), + .axi_w_data_o ( dma_req[s].w.data ), + .axi_w_strb_o ( dma_req[s].w.strb ), + .axi_w_last_o ( dma_req[s].w.last ), + .axi_w_user_o ( dma_req[s].w.user ), + .axi_w_valid_o ( dma_req[s].w_valid ), + .axi_b_ready_o ( dma_req[s].b_ready ), + + .axi_aw_ready_i ( dma_rsp[s].aw_ready ), + .axi_w_ready_i ( dma_rsp[s].w_ready ), + .axi_b_id_i ( dma_rsp[s].b.id ), + .axi_b_resp_i ( dma_rsp[s].b.resp ), + .axi_b_user_i ( dma_rsp[s].b.user ), + .axi_b_valid_i ( dma_rsp[s].b_valid ), + + .init_write_req_valid_o ( init_write_req.req_valid ), + .init_write_req_cfg_o ( init_write_req.req_chan.cfg ), + .init_write_req_term_o ( init_write_req.req_chan.term ), + .init_write_req_strb_o ( init_write_req.req_chan.strb ), + .init_write_req_id_o ( init_write_req.req_chan.id ), + .init_write_req_ready_i ( init_write_rsp.req_ready ), + + .init_write_rsp_valid_i ( init_write_rsp.rsp_valid ), + .init_write_rsp_ready_o ( init_write_req.rsp_ready ), + .idma_busy_o ( idma_busy[s] ) + ); + + // use a spill register to only give responses when a request was + // (or is) asserted + spill_register #( + .T(logic) + ) i_init_read_rsp_reflect ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .valid_i(init_read_req.req_valid), + .ready_o(init_read_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_read_rsp.rsp_valid), + .ready_i(init_read_req.rsp_ready), + .data_o() + ); + + //implement zero memory using init protocol + assign init_read_rsp.rsp_chan.init = '0; + // implement /dev/null + spill_register #( + .T(logic) + ) i_init_write_rsp_reflect ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .valid_i(init_write_req.req_valid), + .ready_o(init_write_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_write_rsp.rsp_valid), + .ready_i(init_write_req.rsp_ready), + .data_o() + ); + + assign init_write_rsp.rsp_chan.init = '0; + + // odd channels: copy in data + end else begin : gen_cpy_in + + // Meta Channel Widths + localparam int unsigned axi_ar_chan_width = axi_pkg::ar_width( + AXI_ADDR_WIDTH, AXI_ID_WIDTH, AXI_USER_WIDTH + ); + localparam int unsigned init_req_chan_width = $bits(init_req_chan_t); + localparam int unsigned obi_a_chan_width = $bits(obi_a_chan_t); + + function int unsigned max_width(input int unsigned a, b); + return (a > b) ? a : b; + endfunction + + typedef struct packed { + axi_ar_chan_t ar_chan; + logic [ + `MY_MAX( + axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width) + ) + -axi_ar_chan_width:0] padding; + } axi_read_ar_chan_padded_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic [ + `MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width)) + -init_req_chan_width:0] padding; + } init_read_req_chan_padded_t; + + typedef struct packed { + obi_a_chan_t a_chan; + logic [ + `MY_MAX(axi_ar_chan_width, `MY_MAX(init_req_chan_width, obi_a_chan_width)) + -obi_a_chan_width:0] padding; + } obi_read_a_chan_padded_t; + + typedef union packed { + axi_read_ar_chan_padded_t axi; + init_read_req_chan_padded_t init; + obi_read_a_chan_padded_t obi; + } read_meta_channel_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-init_req_chan_width:0] padding; + } init_write_req_chan_padded_t; + + typedef struct packed { + obi_a_chan_t a_chan; + logic [`MY_MAX(init_req_chan_width, obi_a_chan_width)-obi_a_chan_width:0] padding; + } obi_write_a_chan_padded_t; + + typedef union packed { + init_write_req_chan_padded_t init; + obi_write_a_chan_padded_t obi; + } write_meta_channel_t; + + // local buses + init_req_t init_read_req, init_write_req; + init_rsp_t init_read_rsp, init_write_rsp; + + // BINDING between ADD_WIDTH <-> AXI_ADDR_WIDTH + assign obi_reorg_req_from_dma[s/2].req = obi2axi_reorg_req_from_dma[s/2].req ; + assign obi_reorg_req_from_dma[s/2].a.addr = obi2axi_reorg_req_from_dma[s/2].a.addr ; + assign obi_reorg_req_from_dma[s/2].a.we = obi2axi_reorg_req_from_dma[s/2].a.we ; + assign obi_reorg_req_from_dma[s/2].a.be = obi2axi_reorg_req_from_dma[s/2].a.be ; + assign obi_reorg_req_from_dma[s/2].a.wdata = obi2axi_reorg_req_from_dma[s/2].a.wdata; + assign obi_reorg_req_from_dma[s/2].rready = obi2axi_reorg_req_from_dma[s/2].rready ; + + assign obi_write_req_from_dma[s/2].req = obi2axi_write_req_from_dma[s/2].req ; + assign obi_write_req_from_dma[s/2].a.addr = obi2axi_write_req_from_dma[s/2].a.addr ; + assign obi_write_req_from_dma[s/2].a.we = obi2axi_write_req_from_dma[s/2].a.we ; + assign obi_write_req_from_dma[s/2].a.be = obi2axi_write_req_from_dma[s/2].a.be ; + assign obi_write_req_from_dma[s/2].a.wdata = obi2axi_write_req_from_dma[s/2].a.wdata; + assign obi_write_req_from_dma[s/2].rready = obi2axi_write_req_from_dma[s/2].rready ; + + idma_backend_synth_r_axi_rw_init_rw_obi #( + .DataWidth (AXI_DATA_WIDTH), + .AddrWidth (AXI_ADDR_WIDTH), + .UserWidth (AXI_USER_WIDTH), + .AxiIdWidth (AXI_ID_WIDTH), + .NumAxInFlight (NB_OUTSND_BURSTS), + .BufferDepth (32'd3), + .TFLenWidth (TFLenWidth), + .MemSysDepth (32'd0), + .CombinedShifter (1'b0), + .RAWCouplingAvail (1'b0), + .MaskInvalidData (1'b0), + .HardwareLegalizer (1'b1), + .RejectZeroTransfers (1'b1), + .ErrorHandling (1'b0) + ) i_idma_backend_r_axi_rw_init_rw_obi ( + .clk_i ( datapath_clk_gated ), + .rst_ni ( rst_ni ), + .test_i ( test_mode_i ), + .req_valid_i ( be_valid[s] ), + .req_ready_o ( be_ready[s] ), + .req_length_i ( idma_req[s].length ), + .req_src_addr_i ( idma_req[s].src_addr ), + .req_dst_addr_i ( idma_req[s].dst_addr ), + .req_src_protocol_i ( idma_req[s].opt.src_protocol ), + .req_dst_protocol_i ( idma_req[s].opt.dst_protocol ), + .req_axi_id_i ( idma_req[s].opt.axi_id ), + .req_src_burst_i ( idma_req[s].opt.src.burst ), + .req_src_cache_i ( idma_req[s].opt.src.cache ), + .req_src_lock_i ( idma_req[s].opt.src.lock ), + .req_src_prot_i ( idma_req[s].opt.src.prot ), + .req_src_qos_i ( idma_req[s].opt.src.qos ), + .req_src_region_i ( idma_req[s].opt.src.region ), + .req_dst_burst_i ( idma_req[s].opt.dst.burst ), + .req_dst_cache_i ( idma_req[s].opt.dst.cache ), + .req_dst_lock_i ( idma_req[s].opt.dst.lock ), + .req_dst_prot_i ( idma_req[s].opt.dst.prot ), + .req_dst_qos_i ( idma_req[s].opt.dst.qos ), + .req_dst_region_i ( idma_req[s].opt.dst.region ), + .req_decouple_aw_i ( idma_req[s].opt.beo.decouple_aw ), + .req_decouple_rw_i ( idma_req[s].opt.beo.decouple_rw ), + .req_src_max_llen_i ( idma_req[s].opt.beo.src_max_llen ), + .req_dst_max_llen_i ( idma_req[s].opt.beo.dst_max_llen ), + .req_src_reduce_len_i ( idma_req[s].opt.beo.src_reduce_len ), + .req_dst_reduce_len_i ( idma_req[s].opt.beo.dst_reduce_len ), + .req_last_i ( idma_req[s].opt.last ), + .rsp_valid_o ( be_rsp_valid[s] ), + .rsp_ready_i ( be_rsp_ready[s] ), + .rsp_cause_o ( idma_rsp[s].pld.cause ), + .rsp_err_type_o ( idma_rsp[s].pld.err_type ), + .rsp_burst_addr_o ( idma_rsp[s].pld.burst_addr ), + .rsp_error_o ( idma_rsp[s].error ), + .rsp_last_o ( idma_rsp[s].last ), + + .eh_req_valid_i ( '0 ), + .eh_req_ready_o ( /* NOT CONNECTED */ ), + .eh_req_i ( '0 ), + + .axi_ar_id_o ( dma_req[s].ar.id ), + .axi_ar_addr_o ( dma_req[s].ar.addr ), + .axi_ar_len_o ( dma_req[s].ar.len ), + .axi_ar_size_o ( dma_req[s].ar.size ), + .axi_ar_burst_o ( dma_req[s].ar.burst ), + .axi_ar_lock_o ( dma_req[s].ar.lock ), + .axi_ar_cache_o ( dma_req[s].ar.cache ), + .axi_ar_prot_o ( dma_req[s].ar.prot ), + .axi_ar_qos_o ( dma_req[s].ar.qos ), + .axi_ar_region_o ( dma_req[s].ar.region ), + .axi_ar_user_o ( dma_req[s].ar.user ), + .axi_ar_valid_o ( dma_req[s].ar_valid ), + .axi_r_ready_o ( dma_req[s].r_ready ), + + .axi_ar_ready_i ( dma_rsp[s].ar_ready ), + .axi_r_id_i ( dma_rsp[s].r.id ), + .axi_r_data_i ( dma_rsp[s].r.data ), + .axi_r_resp_i ( dma_rsp[s].r.resp ), + .axi_r_last_i ( dma_rsp[s].r.last ), + .axi_r_user_i ( dma_rsp[s].r.user ), + .axi_r_valid_i ( dma_rsp[s].r_valid ), + + .init_read_req_valid_o ( init_read_req.req_valid ), + .init_read_req_config_o ( init_read_req.req_chan.cfg ), + .init_read_req_ready_i ( init_read_rsp.req_ready ), + + .init_read_rsp_valid_i (init_read_rsp.rsp_valid ), + .init_read_rsp_init_i ( init_read_rsp.rsp_chan.init ), + .init_read_rsp_ready_o ( init_read_req.rsp_ready ), + + .obi_read_req_a_req_o ( obi2axi_reorg_req_from_dma[s/2].req ), + .obi_read_req_a_addr_o ( obi2axi_reorg_req_from_dma[s/2].a.addr ), + .obi_read_req_a_we_o ( obi2axi_reorg_req_from_dma[s/2].a.we ), + .obi_read_req_a_be_o ( obi2axi_reorg_req_from_dma[s/2].a.be ), + .obi_read_req_a_wdata_o ( obi2axi_reorg_req_from_dma[s/2].a.wdata ), + .obi_read_req_r_ready_o ( obi2axi_reorg_req_from_dma[s/2].rready ), + + .obi_read_rsp_a_gnt_i ( obi_reorg_rsp_to_dma[s/2].gnt ), + .obi_read_rsp_r_valid_i ( obi_reorg_rsp_to_dma[s/2].rvalid ), + .obi_read_rsp_r_rdata_i ( obi_reorg_rsp_to_dma[s/2].r.rdata ), + .obi_read_rsp_r_rid_i ( obi_reorg_rsp_to_dma[s/2].r.rid ), + .obi_read_rsp_r_err_i ( obi_reorg_rsp_to_dma[s/2].r.err ), + + .init_write_req_valid_o ( init_write_req.req_valid ), + .init_write_req_cfg_o ( init_write_req.req_chan.cfg ), + .init_write_req_term_o ( init_write_req.req_chan.term ), + .init_write_req_strb_o ( init_write_req.req_chan.strb ), + .init_write_req_id_o ( init_write_req.req_chan.id ), + .init_write_req_ready_i (init_write_rsp.req_ready ), + + .init_write_rsp_valid_i ( init_write_rsp.rsp_valid ), + .init_write_rsp_ready_o (init_write_req.rsp_ready ), + + .obi_write_req_a_req_o ( obi2axi_write_req_from_dma[s/2].req ), + .obi_write_req_a_addr_o ( obi2axi_write_req_from_dma[s/2].a.addr ), + .obi_write_req_a_we_o ( obi2axi_write_req_from_dma[s/2].a.we ), + .obi_write_req_a_be_o ( obi2axi_write_req_from_dma[s/2].a.be ), + .obi_write_req_a_wdata_o ( obi2axi_write_req_from_dma[s/2].a.wdata ), + .obi_write_req_a_aid_o ( obi2axi_write_req_from_dma[s/2].a.aid ), + .obi_write_req_r_ready_o ( obi2axi_write_req_from_dma[s/2].rready ), + + .obi_write_rsp_a_gnt_i ( obi_write_rsp_to_dma[s/2].gnt ), + .obi_write_rsp_r_valid_i ( obi_write_rsp_to_dma[s/2].rvalid ), + .obi_write_rsp_r_rdata_i ( obi_write_rsp_to_dma[s/2].r.rdata ), + + .idma_busy_o ( idma_busy[s] ) + ); + + // use a spill register to only give responses when a request was + // (or is) asserted + spill_register #( + .T(logic) + ) i_init_read_rsp_reflect ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .valid_i(init_read_req.req_valid), + .ready_o(init_read_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_read_rsp.rsp_valid), + .ready_i(init_read_req.rsp_ready), + .data_o() + ); + //implement zero memory using init protocol + assign init_read_rsp.rsp_chan.init = '0; + // implement /dev/null + spill_register #( + .T(logic) + ) i_init_write_rsp_reflect ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .valid_i(init_write_req.req_valid), + .ready_o(init_write_rsp.req_ready), + .data_i('0), // not used + .valid_o(init_write_rsp.rsp_valid), + .ready_i(init_write_req.rsp_ready), + .data_o() + ); + assign init_write_rsp.rsp_chan.init = '0; + end : gen_cpy_in + end : gen_streams - .busy_o ( midend_busy ) - ); // ------------------------------------------------------ - // BACKEND + // MUX read OBI connections if specified // ------------------------------------------------------ + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin + if (MUX_READ) begin + localparam obi_pkg::obi_cfg_t sbr_obi_cfg = '{ + UseRReady: 1'b1, + CombGnt: 1'b0, + AddrWidth: ADDR_WIDTH, + DataWidth: AXI_DATA_WIDTH, + IdWidth: 1, + Integrity: 1'b0, + BeFull: 1'b1, + OptionalCfg: obi_pkg::ObiMinimalOptionalConfig + }; - idma_backend #( - .DataWidth ( AXI_DATA_WIDTH ), - .AddrWidth ( AXI_ADDR_WIDTH ), - .UserWidth ( AXI_USER_WIDTH ), - .AxiIdWidth ( AXI_ID_WIDTH ), - .NumAxInFlight ( NB_OUTSND_BURSTS ), - .BufferDepth ( 3 ), - .TFLenWidth ( TFLenWidth ), - .RAWCouplingAvail ( 1'b1 ), - .MemSysDepth ( 32'd0 ), - .MaskInvalidData ( 1'b1 ), - .HardwareLegalizer ( 1'b1 ), - .RejectZeroTransfers ( 1'b1 ), - .ErrorCap ( idma_pkg::NO_ERROR_HANDLING ), - .idma_req_t ( idma_req_t ), - .idma_rsp_t ( idma_rsp_t ), - .idma_eh_req_t ( idma_pkg::idma_eh_req_t ), - .idma_busy_t ( idma_pkg::idma_busy_t ), - .protocol_req_t ( slv_req_t ), - .protocol_rsp_t ( slv_resp_t ), - .aw_chan_t ( slv_aw_chan_t ), - .ar_chan_t ( slv_ar_chan_t ) - ) i_idma_backend ( - .clk_i, - .rst_ni, - .testmode_i ( test_mode_i ), + // iDMA OBI - .idma_req_i ( burst_req ), - .req_valid_i ( be_valid ), - .req_ready_o ( be_ready ), + obi_mux #( + .SbrPortObiCfg ( sbr_obi_cfg ), + .MgrPortObiCfg ( sbr_obi_cfg ), + .sbr_port_obi_req_t ( obi_req_t ), + .sbr_port_a_chan_t ( obi_a_chan_t ), + .sbr_port_obi_rsp_t ( obi_rsp_t ), + .sbr_port_r_chan_t ( obi_r_chan_t ), + .mgr_port_obi_req_t ( obi_req_t ), + .mgr_port_obi_rsp_t ( obi_rsp_t ), + .NumSbrPorts ( 2 ), + .NumMaxTrans ( 2 ), + .UseIdForRouting ( 1'b0 ) + ) obi_read_mux_i ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .testmode_i (test_mode_i), + .sbr_ports_req_i({obi_reorg_req_from_dma[s], obi_read_req_from_dma[s]}), + .sbr_ports_rsp_o({obi_reorg_rsp_to_dma[s], obi_read_rsp_to_dma[s]}), + .mgr_port_req_o (obi_read_req_muxed[s]), + .mgr_port_rsp_i (obi_read_rsp_to_mux[s]) + ); + assign obi_reorg_req_from_rrc[s] = '0; + assign obi_reorg_rsp_to_rrc[s] = '0; + end else begin // if (MUX_READ) + // pass through the read req/rsp from/to dma + assign obi_read_req_muxed[s] = obi_read_req_from_dma[s]; + assign obi_read_rsp_to_dma[s] = obi_read_rsp_to_mux[s]; - .idma_rsp_o ( idma_rsp ), - .rsp_valid_o ( be_rsp_valid ), - .rsp_ready_i ( be_rsp_ready ), + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .Depth(1) + ) obi_rready_converter_reorg_i ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .test_mode_i, + .sbr_a_chan_i ( obi_reorg_req_from_dma[s].a ), + .req_i ( obi_reorg_req_from_dma[s].req ), + .gnt_o ( obi_reorg_rsp_to_dma[s].gnt ), + .rready_i ( obi_reorg_req_from_dma[s].rready ), + .sbr_r_chan_o ( obi_reorg_rsp_to_dma[s].r ), + .rvalid_o ( obi_reorg_rsp_to_dma[s].rvalid ), + .mgr_a_chan_o ( obi_reorg_req_from_rrc[s].a ), + .req_o ( obi_reorg_req_from_rrc[s].req ), + .mgr_r_chan_i ( obi_reorg_rsp_to_rrc[s].r ), + .gnt_i ( obi_reorg_rsp_to_rrc[s].gnt ), + .rvalid_i ( obi_reorg_rsp_to_rrc[s].rvalid ) + ); + // We are always ready for responses, because we don't + // send more requests than we can absorb in the fifo + assign obi_reorg_req_from_rrc[s].rready = 1'b1; + end // else: !if(MUX_READ) - .idma_eh_req_i ( '0 ), // No error handling - .eh_req_valid_i ( 1'b1 ), - .eh_req_ready_o (/*NOT CONNECTED*/), + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .Depth(1) + ) obi_rready_converter_read_i ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .test_mode_i, + .sbr_a_chan_i ( obi_read_req_muxed[s].a ), + .req_i ( obi_read_req_muxed[s].req ), + .gnt_o ( obi_read_rsp_to_mux[s].gnt ), + .rready_i ( obi_read_req_muxed[s].rready ), + .sbr_r_chan_o ( obi_read_rsp_to_mux[s].r ), + .rvalid_o ( obi_read_rsp_to_mux[s].rvalid ), + .mgr_a_chan_o ( obi_read_req_from_rrc[s].a ), + .req_o ( obi_read_req_from_rrc[s].req ), + .mgr_r_chan_i ( obi_read_rsp_to_rrc[s].r ), + .gnt_i ( obi_read_rsp_to_rrc[s].gnt ), + .rvalid_i ( obi_read_rsp_to_rrc[s].rvalid ) + ); + // We are always ready for responses, because we don't + // send more requests than we can absorb in the fifo + assign obi_read_req_from_rrc[s].rready = 1'b1; + + + obi_rready_converter #( + .obi_a_chan_t(obi_a_chan_t), + .obi_r_chan_t(obi_r_chan_t), + .Depth(1) + ) obi_rready_converter_wr_i ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .test_mode_i, + .sbr_a_chan_i ( obi_write_req_from_dma[s].a ), + .req_i ( obi_write_req_from_dma[s].req ), + .gnt_o ( obi_write_rsp_to_dma[s].gnt ), + .rready_i ( obi_write_req_from_dma[s].rready ), + .sbr_r_chan_o ( obi_write_rsp_to_dma[s].r ), + .rvalid_o ( obi_write_rsp_to_dma[s].rvalid ), + .mgr_a_chan_o ( obi_write_req_from_rrc[s].a ), + .req_o ( obi_write_req_from_rrc[s].req ), + .mgr_r_chan_i ( obi_write_rsp_to_rrc[s].r ), + .gnt_i ( obi_write_rsp_to_rrc[s].gnt ), + .rvalid_i ( obi_write_rsp_to_rrc[s].rvalid ) + ); + // Same as above + assign obi_write_req_from_rrc[s].rready = 1'b1; + end - .protocol_req_o ( dma_req ), - .protocol_rsp_i ( dma_rsp ), - .busy_o ( idma_busy ) - ); // ------------------------------------------------------ - // AXI connection to EXT/TCDM + // TCDM connections // ------------------------------------------------------ + for (genvar s = 0; s < NUM_BIDIR_STREAMS; s++) begin + if (TCDM_MEM2BANKS) begin : tcdm_mem2banks + // Currently, mem2banks only implemented for AXI_DATA_WIDTH==64 + // TODO: parametrize so it works for arbitrary data widths + initial begin : mem2banks_check_axi_width + if (AXI_DATA_WIDTH != 64) begin + $error("idma_wrap: AXI_DATA_WIDTH must be 64 when TCDM_MEM2BANKS is 1!"); + end + end - // xbar - localparam int unsigned NumRules = 3; - typedef struct packed { - int unsigned idx; - logic [AXI_ADDR_WIDTH-1:0] start_addr; - logic [AXI_ADDR_WIDTH-1:0] end_addr; - } xbar_rule_t; - xbar_rule_t [NumRules-1:0] addr_map; - logic [AXI_ADDR_WIDTH-1:0] cluster_base_addr; - assign cluster_base_addr = ClusterBaseAddr; /* + (cluster_id_i << 22);*/ - assign addr_map = '{ - '{ // SoC low - start_addr: '0, - end_addr: cluster_base_addr, - idx: 0 - }, - '{ // TCDM - start_addr: cluster_base_addr, - end_addr: cluster_base_addr + TCDM_SIZE, - idx: 1 - }, - '{ // SoC high - start_addr: cluster_base_addr + TCDM_SIZE, - end_addr: '1, - idx: 0 - } - }; - localparam int unsigned NumMstPorts = 2; - localparam int unsigned NumSlvPorts = NUM_STREAMS; - - /* verilator lint_off WIDTHCONCAT */ - localparam axi_pkg::xbar_cfg_t XbarCfg = '{ - NoSlvPorts: NumSlvPorts, - NoMstPorts: NumMstPorts, - MaxMstTrans: NB_OUTSND_BURSTS, - MaxSlvTrans: NB_OUTSND_BURSTS, - FallThrough: 1'b0, - LatencyMode: axi_pkg::CUT_ALL_PORTS, - PipelineStages: 0, - AxiIdWidthSlvPorts: SlvIdxWidth, - AxiIdUsedSlvPorts: SlvIdxWidth, - UniqueIds: 1'b0, - AxiAddrWidth: AXI_ADDR_WIDTH, - AxiDataWidth: AXI_DATA_WIDTH, - NoAddrRules: NumRules - }; - /* verilator lint_on WIDTHCONCAT */ - - axi_xbar #( - .Cfg ( XbarCfg ), - .slv_aw_chan_t( slv_aw_chan_t ), - .mst_aw_chan_t( mst_aw_chan_t ), - .w_chan_t ( w_chan_t ), - .slv_b_chan_t ( slv_b_chan_t ), - .mst_b_chan_t ( mst_b_chan_t ), - .slv_ar_chan_t( slv_ar_chan_t ), - .mst_ar_chan_t( mst_ar_chan_t ), - .slv_r_chan_t ( slv_r_chan_t ), - .mst_r_chan_t ( mst_r_chan_t ), - .slv_req_t ( slv_req_t ), - .slv_resp_t ( slv_resp_t ), - .mst_req_t ( mst_req_t ), - .mst_resp_t ( mst_resp_t ), - .rule_t ( xbar_rule_t ) - ) i_dma_axi_xbar ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .test_i ( test_mode_i ), - .slv_ports_req_i ( dma_req ), - .slv_ports_resp_o ( dma_rsp ), - .mst_ports_req_o ( { tcdm_req, soc_req } ), - .mst_ports_resp_i ( { tcdm_rsp, soc_rsp } ), - .addr_map_i ( addr_map ), - .en_default_mst_port_i ( '0 ), - .default_mst_port_i ( '0 ) - ); + logic tcdm_master_we_0; + logic tcdm_master_we_1; + logic tcdm_master_we_2; + logic tcdm_master_we_3; + logic tcdm_master_we_4; + logic tcdm_master_we_5; - localparam int unsigned TcdmFifoDepth = 1; - `AXI_ASSIGN_REQ_STRUCT(tcdm_mem_req, tcdm_req) - - axi_to_mem_split #( - .axi_req_t ( mem_req_t ), - .axi_resp_t ( mst_resp_t ), - .AddrWidth ( ADDR_WIDTH ), - .AxiDataWidth ( AXI_DATA_WIDTH ), - .IdWidth ( MstIdxWidth ), - .MemDataWidth ( DATA_WIDTH ), - .BufDepth ( TcdmFifoDepth ), - .HideStrb ( 1'b1 ) - ) i_axi_to_mem ( - .clk_i, - .rst_ni, - .busy_o (), - .axi_req_i ( tcdm_mem_req ), - .axi_resp_o ( tcdm_rsp ), - .mem_req_o ( { tcdm_master[0].req, tcdm_master[1].req, - tcdm_master[2].req, tcdm_master[3].req } ), - .mem_gnt_i ( { tcdm_master[0].gnt, tcdm_master[1].gnt, - tcdm_master[2].gnt, tcdm_master[3].gnt } ), - .mem_addr_o ( { tcdm_master[0].add, tcdm_master[1].add, - tcdm_master[2].add, tcdm_master[3].add } ), - .mem_wdata_o ( { tcdm_master[0].data, tcdm_master[1].data, - tcdm_master[2].data, tcdm_master[3].data } ), - .mem_strb_o ( { tcdm_master[0].be, tcdm_master[1].be, - tcdm_master[2].be, tcdm_master[3].be } ), - .mem_atop_o ( ), - .mem_we_o ( { tcdm_master_we_0, tcdm_master_we_1, - tcdm_master_we_2, tcdm_master_we_3 } ), - .mem_rvalid_i ( { tcdm_master[0].r_valid, tcdm_master[1].r_valid, - tcdm_master[2].r_valid, tcdm_master[3].r_valid } ), - .mem_rdata_i ( { tcdm_master[0].r_data, tcdm_master[1].r_data, - tcdm_master[2].r_data, tcdm_master[3].r_data } ) - ); + mem_to_banks #( + .AddrWidth(ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_write ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .req_i ( obi_write_req_from_rrc[s].req ), + .gnt_o ( obi_write_rsp_to_rrc[s].gnt ), + .addr_i ( obi_write_req_from_rrc[s].a.addr ), + .wdata_i ( obi_write_req_from_rrc[s].a.wdata ), + .strb_i ( obi_write_req_from_rrc[s].a.be ), + .atop_i ( '0 ), + .we_i ( obi_write_req_from_rrc[s].a.we ), + .rvalid_o ( obi_write_rsp_to_rrc[s].rvalid ), + .rdata_o ( obi_write_rsp_to_rrc[s].r.rdata ), + .bank_req_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req} ), + .bank_gnt_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt} ), + .bank_addr_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add} ), + .bank_wdata_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data} ), + .bank_strb_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be} ), + .bank_atop_o ( /* NOT CONNECTED */ ), + .bank_we_o ( {tcdm_master_we_1, tcdm_master_we_0} ), + .bank_rvalid_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid} ), + .bank_rdata_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data} ) + ); - // flip we polarity - assign tcdm_master[0].wen = !tcdm_master_we_0; - assign tcdm_master[1].wen = !tcdm_master_we_1; - assign tcdm_master[2].wen = !tcdm_master_we_2; - assign tcdm_master[3].wen = !tcdm_master_we_3; - - for (genvar ii=0; ii<4; ii++) begin : gen_tie_unused_tcdm_master - assign tcdm_master[ii].user = '0; - assign tcdm_master[ii].ecc = '0; - assign tcdm_master[ii].id = '0; - assign tcdm_master[ii].ereq = '0; - assign tcdm_master[ii].r_eready = '1; - end + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].wen = !tcdm_master_we_0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !tcdm_master_we_1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+0].ecc = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].ecc = '0; + + mem_to_banks #( + .AddrWidth(ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_read ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .req_i ( obi_read_req_from_rrc[s].req ), + .gnt_o ( obi_read_rsp_to_rrc[s].gnt ), + .addr_i ( obi_read_req_from_rrc[s].a.addr ), + .wdata_i ( obi_read_req_from_rrc[s].a.wdata ), + .strb_i ( obi_read_req_from_rrc[s].a.be ), + .atop_i ( '0 ), + .we_i ( obi_read_req_from_rrc[s].a.we ), + .rvalid_o ( obi_read_rsp_to_rrc[s].rvalid ), + .rdata_o ( obi_read_rsp_to_rrc[s].r.rdata ), + .bank_req_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req } ), + .bank_gnt_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt } ), + .bank_addr_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add } ), + .bank_wdata_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data } ), + .bank_strb_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be } ), + .bank_atop_o ( /* NOT CONNECTED */ ), + .bank_we_o ( {tcdm_master_we_3, tcdm_master_we_2} ), + .bank_rvalid_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_valid, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid } ), + .bank_rdata_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data } ) + ); + + + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].boffs = '0; + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0; + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].boffs = '0; + //assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !tcdm_master_we_2; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].wen = !tcdm_master_we_3; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].ecc = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+3].ecc = '0; + + + if (!MUX_READ) begin // if we don't mux the read, we have 6*NUM_BIDIR_STREAMS interfaces and the reorg + // interface goes straight to TCDM masters 5 and 4. + mem_to_banks #( + .AddrWidth(ADDR_WIDTH), + .DataWidth(AXI_DATA_WIDTH), + .NumBanks (32'd2), + .HideStrb (1'b1), + .MaxTrans (32'd1), + .FifoDepth(32'd1) + ) i_mem_to_banks_reorg ( + .clk_i ( datapath_clk_gated ), + .rst_ni, + .req_i ( obi_reorg_req_from_rrc[s].req ), + .gnt_o ( obi_reorg_rsp_to_rrc[s].gnt ), + .addr_i ( obi_reorg_req_from_rrc[s].a.addr ), + .wdata_i ( obi_reorg_req_from_rrc[s].a.wdata ), + .strb_i ( obi_reorg_req_from_rrc[s].a.be ), + .atop_i ( '0 ), + .we_i ( obi_reorg_req_from_rrc[s].a.we ), + .rvalid_o ( obi_reorg_rsp_to_rrc[s].rvalid ), + .rdata_o ( obi_reorg_rsp_to_rrc[s].r.rdata ), + .bank_req_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].req, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].req} ), + .bank_gnt_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].gnt, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].gnt} ), + .bank_addr_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].add, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].add} ), + .bank_wdata_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].data, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].data} ), + .bank_strb_o ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].be, tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].be} ), + .bank_atop_o ( /* NOT CONNECTED */ ), + .bank_we_o ( {tcdm_master_we_5, tcdm_master_we_4} ), + .bank_rvalid_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_valid,tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_valid} ), + .bank_rdata_i ( {tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_data,tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_data} ) + ); -endmodule : dmac_wrap + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].boffs = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].boffs = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].lrdy = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].wen = !tcdm_master_we_4; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].wen = !tcdm_master_we_5; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].r_ready = 1'b1; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+4].ecc = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+5].ecc = '0; + end + end else begin : passthrough_obi_to_tcdm + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].req = obi_write_req_from_rrc[s].req; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].wen = !obi_write_req_from_rrc[s].a.we; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].add = obi_write_req_from_rrc[s].a.addr; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].data = obi_write_req_from_rrc[s].a.wdata; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].be = obi_write_req_from_rrc[s].a.be; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_ready = obi_write_req_from_rrc[s].rready; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].id = '0; // TODO change? + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s].ecc = '0; + assign obi_write_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].gnt; + assign obi_write_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_valid; + assign obi_write_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s].r_data; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].user = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].req = obi_read_req_from_rrc[s].req; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].wen = !obi_read_req_from_rrc[s].a.we; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].add = obi_read_req_from_rrc[s].a.addr; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].data = obi_read_req_from_rrc[s].a.wdata; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].be = obi_read_req_from_rrc[s].a.be; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_ready = obi_read_req_from_rrc[s].rready; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].ecc = '0; + assign obi_read_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].gnt; + assign obi_read_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_valid; + assign obi_read_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+1].r_data; + if (!MUX_READ) begin : passthrough_obi_read + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].req = obi_reorg_req_from_rrc[s].req; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].wen = !obi_reorg_req_from_rrc[s].a.we; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].add = obi_reorg_req_from_rrc[s].a.addr; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].data = obi_reorg_req_from_rrc[s].a.wdata; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].be = obi_reorg_req_from_rrc[s].a.be; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_ready = obi_read_req_from_rrc[s].rready; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].id = '0; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].ecc = '0; + assign obi_reorg_rsp_to_rrc[s].gnt = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].gnt; + assign obi_reorg_rsp_to_rrc[s].rvalid = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_valid; + assign obi_reorg_rsp_to_rrc[s].r.rdata = tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].r_data; + assign tcdm_master[NB_TCDM_PORTS_PER_STRM*s+2].user = '0; + end + end + end +endmodule +`undef MY_MAX diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index b8694ff7..e00c0774 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -113,9 +113,6 @@ module pulp_cluster input logic pwr_on_rst_ni, input logic pmu_mem_pwdn_i, - - input logic [3:0] base_addr_i, - input logic test_mode_i, input logic en_sa_boot_i, @@ -203,7 +200,7 @@ module pulp_cluster output logic [Cfg.AxiCdcLogDepth:0] async_data_master_b_rptr_o ); -//Ensure that the input AXI ID width is big enough to accomodate the accomodate the IDs of internal wiring +//Ensure that the input AXI ID width is big enough to accomodate the IDs of internal wiring if (Cfg.AxiIdInWidth < 1 + $clog2(Cfg.iCacheNumBanks)) $info("AXI input ID width must be larger than 1+$clog2(Cfg.iCacheNumBanks) which is %d but was %d", 1 + $clog2(Cfg.iCacheNumBanks), Cfg.AxiIdInWidth); @@ -231,6 +228,8 @@ logic [Cfg.NumCores-1:0] s_dbg_irq; logic s_hwpe_en; logic [$clog2(MAX_NUM_HWPES)-1:0] s_hwpe_sel; +logic s_idma_en; + logic fetch_en_synch; logic en_sa_boot_synch; logic axi_isolate_synch; @@ -305,7 +304,7 @@ localparam hci_package::hci_size_parameter_t HciCoreSizeParam = '{ EHW: DEFAULT_EHW }; localparam hci_package::hci_size_parameter_t HciHwpeSizeParam = '{ - DW: Cfg.HwpeNumPorts * DataWidth, + DW: (Cfg.HwpePresent) ? Cfg.HwpeNumPorts * DataWidth : 1, AW: AddrWidth, BW: DEFAULT_BW, UW: DEFAULT_UW, @@ -523,7 +522,7 @@ hci_core_intf #( .init_no ( s_init_n ) ); -/* fetch & busy genertion */ +/* fetch & busy generation */ assign s_cluster_int_busy = s_cluster_periphs_busy | s_per2axi_busy | s_axi2per_busy | s_axi2mem_busy | s_dmac_busy | s_hwpe_busy; assign busy_o = s_cluster_int_busy | (|core_busy); assign fetch_en_int = fetch_enable_reg_int; @@ -704,6 +703,26 @@ cluster_interconnect_wrap #( .TCDM_arb_policy_i ( s_TCDM_arb_policy ) ); +//*************************************************** +//****************iDMA Clock Gating****************** +//*************************************************** +//***************************************************** +// CONTROL CLOCK GATING CELL --> This clock gating cell +// handles the clock gating control signal coming from +// the cluster control unit, completely disabling the +// clock in the idma wrapper +//***************************************************** + +`ifdef TARGET_IDMA +logic idma_clk_gated; +cluster_clock_gating idma_ctrl_ckgate ( + .clk_i ( clk_i ), + .en_i ( s_idma_en ), + .test_en_i ( test_mode_i ), + .clk_o ( idma_clk_gated ) + ); +`endif + //*************************************************** //*********************DMAC WRAP********************* //*************************************************** @@ -722,16 +741,22 @@ dmac_wrap #( .axi_resp_t ( c2s_in_int_resp_t ), `ifdef TARGET_MCHAN .NB_CTRLS ( Cfg.NumCores + 2 ), - .MCHAN_BURST_LENGTH ( Cfg.DmaBurstLength ), + .MCHAN_BURST_LENGTH ( 8 * (1 << Cfg.DmaBurstLength) ), .TCDM_ADD_WIDTH ( TcdmAddrWidth ) `else .NB_PE_PORTS ( 2 ), - .NUM_STREAMS ( 4 ), - .TCDM_SIZE ( Cfg.TcdmSize ), - .ClusterBaseAddr ( Cfg.ClusterBaseAddr ) + .NUM_BIDIR_STREAMS ( 1 ), + .GLOBAL_QUEUE_DEPTH ( 8 ), + .MUX_READ ( 1'b1 ), + .TCDM_MEM2BANKS ( 1 ), + .IDMA_BURST_LENGTH ( Cfg.DmaBurstLength ) `endif ) dmac_wrap_i ( +`ifdef TARGET_IDMA + .clk_i ( idma_clk_gated ), +`else .clk_i ( clk_i ), +`endif .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), .pe_ctrl_slave ( s_periph_dma_bus[1:0] ), @@ -793,8 +818,6 @@ cluster_peripherals #( .dma_irq_i ( s_dma_irq ), .mbox_irq_i ( mbox_irq_synch ), - // NEW_SIGNALS .decompr_done_evt_i ( s_decompr_done_evt ), - .dma_fc_event_i ( s_dma_fc_event ), .dma_fc_irq_i ( '0 ), @@ -829,6 +852,7 @@ cluster_peripherals #( .hwpe_events_i ( s_hwpe_remap_evt ), .hwpe_en_o ( s_hwpe_en ), .hwpe_sel_o ( s_hwpe_sel ), + .idma_en_o ( s_idma_en ), .hci_ctrl_o ( s_hci_ctrl ), .enable_l1_l15_prefetch_o ( s_enable_l1_l15_prefetch ), .flush_valid_o ( s_icache_flush_valid ), @@ -923,6 +947,7 @@ generate .APU_NDSFLAGS_CPU ( FpuInFlagsWidth ), .APU_NUSFLAGS_CPU ( FpuOutFlagsWidth ), .DEBUG_START_ADDR ( Cfg.DmBaseAddr ), + .CLUSTER_BASE ( Cfg.ClusterBaseAddr ), .FPU ( Cfg.EnablePrivateFpu ), .FP_DIVSQRT ( Cfg.EnablePrivateFpDivSqrt ), .core_data_req_t ( core_data_req_t ), @@ -1018,6 +1043,7 @@ generate .RemapAddress ( Cfg.EnableRemapAddress ), .ClustAlias ( Cfg.ClusterAlias ), .ClustAliasBase ( Cfg.ClusterAliasBase ), + .ClustBaseAddr ( Cfg.ClusterBaseAddr ), .NumExtPerf ( 5 ), .core_data_req_t ( core_data_req_t ), .core_data_rsp_t ( core_data_rsp_t ) @@ -1026,7 +1052,6 @@ generate .rst_ni ( rst_ni ), .test_en_i ( test_mode_i ), .clk_en_i ( clk_core_en[i] ), - .base_addr_i ( base_addr_i ), .ext_perf_o ( ext_perf[i] ), .core_data_req_i ( demux_data_req[i] ), .core_data_rsp_o ( demux_data_rsp[i] ), @@ -1038,40 +1063,53 @@ generate end endgenerate -logic [Cfg.NumCores/3-1:0] hmr_tmr_synch; -for (genvar i = 0; i < Cfg.NumCores/3; i++) begin - if (1'b1) begin // InterleaveGrps - assign hmr_tmr_synch[i] = hmr_barrier_matched[i + 1]; - end else begin - assign hmr_tmr_synch[i] = hmr_barrier_matched[i + i/2 + 1]; - end -end +generate + if (Cfg.HMRPresent) begin : gen_hmr_unit -logic [Cfg.NumCores/3-1:0] hmr_tmr_sw_resynch_req_short; -logic [Cfg.NumCores/2-1:0] hmr_dmr_sw_resynch_req_short; -always_comb begin - hmr_tmr_sw_resynch_req = '0; - hmr_dmr_sw_resynch_req = '0; - - for (int i = 0; i < Cfg.NumCores/3; i++) begin - if (1'b1) begin // InterleaveGrps - hmr_tmr_sw_resynch_req[i] = hmr_tmr_sw_resynch_req_short[i]; - end else begin - hmr_tmr_sw_resynch_req[3*i] = hmr_tmr_sw_resynch_req_short[i]; + localparam int unsigned NumTMRGroups = Cfg.HMRTmrEnabled ? NumCores/3 : 1; + localparam int unsigned NumDMRGroups = Cfg.HMRDmrEnabled ? NumCores/2 : 1; + + logic [NumTMRGroups-1:0] hmr_tmr_synch; + logic [NumTMRGroups-1:0] hmr_tmr_sw_resynch_req_short; + logic [NumDMRGroups-1:0] hmr_dmr_sw_resynch_req_short; + + + if (Cfg.HMRTmrEnabled) begin : gen_hmr_tmr_synch + for (genvar i = 0; i < Cfg.NumCores/3; i++) begin + if (1'b1) begin // InterleaveGrps + assign hmr_tmr_synch[i] = hmr_barrier_matched[i + 1]; + end else begin + assign hmr_tmr_synch[i] = hmr_barrier_matched[i + i/2 + 1]; + end + end + + always_comb begin + hmr_tmr_sw_resynch_req = '0; + for (int i = 0; i < Cfg.NumCores/3; i++) begin + if (1'b1) begin // InterleaveGrps + hmr_tmr_sw_resynch_req[i] = hmr_tmr_sw_resynch_req_short[i]; + end else begin + hmr_tmr_sw_resynch_req[3*i] = hmr_tmr_sw_resynch_req_short[i]; + end + end + end + + end else begin : gen_no_hmr_tmr_synch + assign hmr_tmr_synch = '0; + assign hmr_tmr_sw_resynch_req = '0; end - end - for (int i = 0; i < Cfg.NumCores/2; i++) begin - if (1'b1) begin // InterleaveGrps - hmr_dmr_sw_resynch_req[i] = hmr_dmr_sw_resynch_req_short[i]; - end else begin - hmr_dmr_sw_resynch_req[2*i] = hmr_dmr_sw_resynch_req_short[i]; + always_comb begin + hmr_dmr_sw_resynch_req = '0; + for (int i = 0; i < Cfg.NumCores/2; i++) begin + if (1'b1) begin // InterleaveGrps + hmr_dmr_sw_resynch_req[i] = hmr_dmr_sw_resynch_req_short[i]; + end else begin + hmr_dmr_sw_resynch_req[2*i] = hmr_dmr_sw_resynch_req_short[i]; + end + end end - end -end -generate - if (Cfg.HMRPresent) begin : gen_hmr_unit hmr_unit #( .NumCores ( Cfg.NumCores ), .DMRSupported ( Cfg.HMRDmrEnabled ), @@ -1124,28 +1162,36 @@ generate .core_bus_outputs_i ( '0 ), .core_axi_outputs_i ( '0 ) ); + + `ifndef VERILATOR + initial begin: p_assertions + assert (Cfg.HMRPresent && (Cfg.HMRDmrEnabled || Cfg.HMRTmrEnabled)) + else $fatal(1, "Either DMR or TMR must be enabled when HMR is present!"); + end + `endif + end else begin : gen_no_hmr_unit assign hmr_reg_rsp = '0; - assign hmr_tmr_sw_resynch_req_short = '0; assign hmr_tmr_sw_synch_req = '0; - assign hmr_dmr_sw_resynch_req_short = '0; + assign hmr_tmr_sw_resynch_req = '0; + assign hmr_dmr_sw_resynch_req = '0; assign hmr_dmr_sw_synch_req = '0; assign recovery_bus = '0; assign setback = '0; for (genvar i = 0; i < Cfg.NumCores; i++) begin - assign hmr2core[i].clock_en = sys2hmr[i].clock_en; - assign hmr2core[i].boot_addr = sys2hmr[i].boot_addr; - assign hmr2core[i].core_id = sys2hmr[i].core_id; - assign hmr2core[i].cluster_id = sys2hmr[i].cluster_id; - assign hmr2core[i].instr_gnt = sys2hmr[i].instr_gnt; - assign hmr2core[i].instr_rvalid = sys2hmr[i].instr_rvalid; - assign hmr2core[i].instr_rdata = sys2hmr[i].instr_rdata; - assign hmr2core[i].data_gnt = sys2hmr[i].data_gnt; - assign hmr2core[i].data_rvalid = sys2hmr[i].data_rvalid; - assign hmr2core[i].data_rdata = sys2hmr[i].data_rdata; - assign hmr2core[i].irq_req = sys2hmr[i].irq_req; - assign hmr2core[i].irq_id = sys2hmr[i].irq_id; + assign hmr2core[i].clock_en = sys2hmr[i].clock_en; + assign hmr2core[i].boot_addr = sys2hmr[i].boot_addr; + assign hmr2core[i].core_id = sys2hmr[i].core_id; + assign hmr2core[i].cluster_id = sys2hmr[i].cluster_id; + assign hmr2core[i].instr_gnt = sys2hmr[i].instr_gnt; + assign hmr2core[i].instr_rvalid = sys2hmr[i].instr_rvalid; + assign hmr2core[i].instr_rdata = sys2hmr[i].instr_rdata; + assign hmr2core[i].data_gnt = sys2hmr[i].data_gnt; + assign hmr2core[i].data_rvalid = sys2hmr[i].data_rvalid; + assign hmr2core[i].data_rdata = sys2hmr[i].data_rdata; + assign hmr2core[i].irq_req = sys2hmr[i].irq_req; + assign hmr2core[i].irq_id = sys2hmr[i].irq_id; assign hmr2sys[i].instr_req = core2hmr[i].instr_req; assign hmr2sys[i].instr_addr = core2hmr[i].instr_addr; @@ -1183,12 +1229,12 @@ begin assign s_apu_master_rflags[k] = s_apu__rflags[k]; end -// At the moment, the cluster does not support any shared execution unit +// At the moment, the cluster does not support any shared execution unit assign s_apu_master_gnt = '0; assign s_apu_master_rvalid = '0; assign s_apu_master_rdata = '0; assign s_apu__rflags = '0; - + //************************************************************** //**** HW Processing Engines / Cluster-Coupled Accelerators **** //************************************************************** @@ -1702,7 +1748,7 @@ axi_cdc_dst #( ); // If the AXI ID width of the subordinate port does not match the one required, we interpose -// an AXI ID remapper. Otherwise the busses are simply assigned. +// an AXI ID remapper. Otherwise the buses are simply assigned. `AXI_TYPEDEF_AW_CHAN_T(s2c_remap_aw_chan_t,logic[Cfg.AxiAddrWidth-1:0],logic[AxiIdInWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) `AXI_TYPEDEF_W_CHAN_T(s2c_remap_w_chan_t,logic[Cfg.AxiDataInWidth-1:0],logic[Cfg.AxiDataInWidth/8-1:0],logic[Cfg.AxiUserWidth-1:0]) `AXI_TYPEDEF_B_CHAN_T(s2c_remap_b_chan_t,logic[AxiIdInWidth-1:0],logic[Cfg.AxiUserWidth-1:0]) diff --git a/rtl/pulp_cluster_wrap.sv b/rtl/pulp_cluster_wrap.sv index 25d1e700..b71b242b 100644 --- a/rtl/pulp_cluster_wrap.sv +++ b/rtl/pulp_cluster_wrap.sv @@ -66,7 +66,6 @@ module pulp_cluster_wrap ( input logic ref_clk_i, input logic pwr_on_rst_ni, input logic pmu_mem_pwdn_i, - input logic [3:0] base_addr_i, input logic test_mode_i, input logic en_sa_boot_i, input logic [5:0] cluster_id_i, @@ -140,7 +139,6 @@ module pulp_cluster_wrap ( .axi_isolate_i ( '0 ), .axi_isolated_o, .pmu_mem_pwdn_i ( 1'b0 ), - .base_addr_i, .dma_pe_evt_ack_i ( '1 ), .dma_pe_evt_valid_o, .dma_pe_irq_ack_i ( 1'b1 ), diff --git a/scripts/run_and_exit.tcl b/scripts/run_and_exit.tcl index 801e0065..34c9eacc 100644 --- a/scripts/run_and_exit.tcl +++ b/scripts/run_and_exit.tcl @@ -7,12 +7,14 @@ if {![info exists APP]} { set APP "./build/test/test" } -if {![info exists VSIM]} { +if {[info exists USE_QONE] && $USE_QONE == 1} { + set QSIM qsim + $QSIM -qwavedb=+signal+memory +permissive -suppress 3053 -suppress 8885 -suppress 12130 -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps pulp_cluster_tb_optimized +permissive-off ++./build/test/test +} else { set VSIM vsim + $VSIM +permissive -suppress 3053 -suppress 8885 -suppress 12130 -suppress 7077 -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps pulp_cluster_tb_optimized +permissive-off ++./build/test/test } -$VSIM +permissive -suppress 3053 -suppress 8885 -suppress 12130 -suppress 7077 -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps pulp_cluster_tb_optimized +permissive-off ++./build/test/test - proc run_and_exit {} { run -all quit -code [examine -radix decimal sim:/pulp_cluster_tb/ret_val(30:0)] diff --git a/scripts/start.tcl b/scripts/start.tcl index 97053ab6..957dd8b0 100644 --- a/scripts/start.tcl +++ b/scripts/start.tcl @@ -3,20 +3,15 @@ if {![info exists VSIM_PATH ]} { set VSIM_PATH "" } -if {![info exists VSIM]} { - set VSIM vsim +if {[info exists USE_QONE] && $USE_QONE == 1} { + qsim -qwavedb=+signal+memory +permissive -suppress 3053 -suppress 8885 -suppress 12130 -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps pulp_cluster_tb_optimized +permissive-off ++./build/test/test +} else { + vsim +permissive -suppress 3053 -suppress 8885 -suppress 12130 -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps pulp_cluster_tb_optimized +permissive-off ++./build/test/test } -$VSIM +permissive -suppress 3053 -suppress 8885 -suppress 12130 -suppress 7077 -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps pulp_cluster_tb_optimized +permissive-off ++./build/test/test - if {[info exists ::env(FAULT_INJECTION)]} { if {![info exists ::env(FAULT_INJECTION_SCRIPT)]} { error "Error: Missing FAULT_INJECTION_SCRIPT to source!" } source $::env(FAULT_INJECTION_SCRIPT) } - -source $VSIM_PATH/scripts/wave.tcl - -add log -r /* -run -all diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index 054341c1..74babb36 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -14,7 +14,7 @@ // the virtual interfaces and starts the test passed by +UVM_TEST+ //`define TEST_CLOCK_BYPASS -`timescale 1ps/1ps +`timescale 1ns/1ps `include "pulp_soc_defines.sv" `include "axi/assign.svh" @@ -35,9 +35,9 @@ module pulp_cluster_tb; logic s_rstn; logic s_rstn_cl; - localparam time SYS_TCK = 8ns; - localparam time SYS_TA = 2ns; - localparam time SYS_TT = SYS_TCK - 2ns; + localparam time SYS_TCK = 2ns; + localparam time SYS_TA = 0.5ns; + localparam time SYS_TT = SYS_TCK - 0.5ns; clk_rst_gen #( .ClkPeriod ( SYS_TCK ), @@ -64,6 +64,8 @@ module pulp_cluster_tb; localparam bit[AxiAw-1:0] ClustBaseAddr = ClustBase - (ClustIdx << 22); localparam bit[AxiAw-1:0] L2BaseAddr = 'h1C000000; localparam bit[AxiAw-1:0] L2Size = 'h00100000; + localparam bit[AxiAw-1:0] UartBaseAddr = 'h40000000; + localparam bit[AxiAw-1:0] UartSize = 'h1000; localparam bit[AxiAw-1:0] BootAddr = L2BaseAddr + 'h8080; localparam bit[AxiAw-1:0] ClustReturnInt = ClustBase + ClustPeriphOffs + 'h100; @@ -177,12 +179,12 @@ module pulp_cluster_tb; ); mock_uart_axi #( - .AxiIw ( AxiIwMst ), - .AxiAw ( AxiAw ), - .AxiDw ( AxiDw ), - .AxiUw ( AxiUw ), - .N_CORES ( 8 ), - .BaseAddr( 32'h4000_0000 ) + .AxiIw ( AxiIwMst ), + .AxiAw ( AxiAw ), + .AxiDw ( AxiDw ), + .AxiUw ( AxiUw ), + .N_CORES ( `NB_CORES ), + .BaseAddr( UartBaseAddr ) ) i_mock_uart ( .clk_i ( s_clk ), .rst_ni ( s_rstn ), @@ -202,8 +204,8 @@ module pulp_cluster_tb; rule_t [NumRules-1:0] addr_map; assign addr_map[0] = '{ // UART idx: 0, - start_addr: 'h03002000, - end_addr: 'h03003000 + start_addr: UartBaseAddr, + end_addr: UartBaseAddr + UartSize }; assign addr_map[1] = '{ // 512KiB L2SPM idx: 1, @@ -292,14 +294,14 @@ module pulp_cluster_tb; NumCores: `NB_CORES, DmaNumPlugs: `NB_DMAS, DmaNumOutstandingBursts: 8, - DmaBurstLength: 256, + DmaBurstLength: 5, NumMstPeriphs: `NB_MPERIPHS, NumSlvPeriphs: `NB_SPERIPHS, ClusterAlias: 1, ClusterAliasBase: 'h0, NumSyncStages: 3, UseHci: 1, - TcdmSize: 128*1024, + TcdmSize: 256*1024, TcdmNumBank: 16, HwpePresent: 1, HwpeCfg: '{NumHwpes: 3, HwpeList: {SOFTEX, NEUREKA, REDMULE}}, @@ -323,7 +325,7 @@ module pulp_cluster_tb; iCachePrivateSize: 512, iCachePrivateDataWidth: 32, EnableReducedTag: 1, - L2Size: 1000*1024, + L2Size: 512*1024, DmBaseAddr: 'h60203000, BootRomBaseAddr: BootAddr, BootAddr: BootAddr, @@ -351,10 +353,12 @@ module pulp_cluster_tb; }; pulp_cluster +`ifndef CLUSTER_NETLIST `ifdef USE_PULP_PARAMETERS #( .Cfg ( PulpClusterCfg ) ) +`endif `endif cluster_i ( .clk_i ( s_clk ), @@ -366,8 +370,6 @@ module pulp_cluster_tb; .pmu_mem_pwdn_i ( 1'b0 ), - .base_addr_i ( ClustBase[31:28] ), - .dma_pe_evt_ack_i ( '1 ), .dma_pe_evt_valid_o ( ), @@ -580,5 +582,38 @@ module pulp_cluster_tb; end +/************** + * VCD Dump * + **************/ + +`ifdef VCD_DUMP + initial begin: vcd_dump + string vcd_dump_file; + + // Wait for the reset + wait (s_rstn); + + // Wait until the probe is high + while (!s_cluster_fetch_en) + @(posedge s_clk); + + if ( $value$plusargs ("VCD_DUMP_FILE=%s", vcd_dump_file)); + $display("[TB] Dumping VCD in %s", vcd_dump_file); + + $dumpfile(vcd_dump_file); + $dumpvars(0, cluster_i); + $dumpon; + + // Wait until the probe is low + while (s_cluster_fetch_en) + @(posedge s_clk); + + $dumpoff; + + // Stop the execution + $finish(0); + end: vcd_dump +`endif + endmodule : pulp_cluster_tb