Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion hdl/projects/cosmo_seq/dimms_subsystem/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ vunit_sim(

vunit_sim(
name = "spd_proxy_top_tb",
srcs = glob(["sims/*.vhd"], exclude = ["sims/*tb_pkg.vhd"]),
srcs = glob(["sims/*.vhd"], exclude = ["sims/*tb_pkg.vhd", "sims/dimm_arb_mux_tb.vhd"]),
deps = [
":dimms_subsystem_top",
":spd_shared_sim_pkg",
Expand All @@ -66,4 +66,11 @@ vunit_sim(
"//hdl/ip/vhd/vunit_components:i2c_controller_vc"
],
visibility = ['PUBLIC'],
)

vunit_sim(
name = "dimm_arb_mux_tb",
srcs = ["sims/dimm_arb_mux_tb.vhd"],
deps = [":dimms_subsystem_top"],
visibility = ['PUBLIC'],
)
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ entity dimm_arb_mux is

fpga_i2c_has_bus : out std_logic;
sp5_playback_i2c_has_bus : out std_logic;
forced_idle_delay : out std_logic;
sp5_i2c_has_bus : out std_logic;

);
Expand Down Expand Up @@ -125,13 +124,11 @@ architecture rtl of dimm_arb_mux is
state : sample_state_t;
data_bits : unsigned(6 downto 0);
bit_count : integer range 0 to 7;
allowed_to_handoff : std_logic;
end record;
constant SAMPLE_REG_RESET : sample_reg_t := (
state => SAMPLE_IDLE,
data_bits => (others => '0'),
bit_count => 0,
allowed_to_handoff => '0'
bit_count => 0
);
signal sample_r, sample_rin : sample_reg_t;
signal dimm_i2c_idle : std_logic;
Expand Down Expand Up @@ -160,7 +157,6 @@ begin
mux_r.state = POWER_UP_CLEAR or
mux_r.state = ENSURE_PLAYBACK_HOLD else '0';
sp5_i2c_has_bus <= '1' when mux_r.state = CPU_HAS_BUS else '0';
forced_idle_delay <= '1' when mux_r.state = BUS_IDLE_DELAY else '0';
fpga_i2c_abort_or_finish <= mux_r.fpga_abort_or_finish;

-- need to enforce minimum idle time on the bus before we can
Expand Down Expand Up @@ -232,6 +228,7 @@ begin
when SAMPLE_ERROR =>
-- Unexpected state, go back to idle
v.state := SAMPLE_IDLE;
v.bit_count := 0;
end case;

sample_rin <= v;
Expand Down Expand Up @@ -305,10 +302,11 @@ begin
when PLAY_STORED_START =>
-- play back the stored start condition
-- if the CPU is still in the start condition but SCL is still high
-- just hand over the bus immediately. We'll have generated start already here.
-- go through ENSURE_PLAYBACK_HOLD to satisfy I2C t_HD;STA hold time.
v.dimm_sda_oe := '1'; --generate a start by pulling sda low
if sample_r.state = SAMPLE_START and cpu_scl_in = '1' then
v.state := CPU_HAS_BUS;
v.state := ENSURE_PLAYBACK_HOLD;
v.hold_timer := 0;
v.playback_done := '1';
elsif playback_scl_fedge = '1' then
if sample_r.state = SAMPLE_DATA and sample_r.bit_count > 0 then
Expand All @@ -331,22 +329,13 @@ begin
-- We're going to play back one bit per synthesized scl rising edge until we catch
-- up with the sampled bits. We'll to CPU_HAS_BUS only if we've caught up
-- and CPU's SCL is low so that the SDA handoff is clean.

if playback_scl_redge = '1' then
v.playback_bits := mux_r.playback_bits + 1;
if v.playback_bits = sample_r.bit_count and cpu_scl_in = '1' then
v.state := ENSURE_PLAYBACK_HOLD;
v.playback_done := '1';
v.playback_bits := 0;
v.hold_timer := 0;
-- prevent small glitches or arbitration oddities. Since we've caught up
-- and are at a scl fedge, match the CPU's sda line for a smooth transition
v.dimm_sda_oe := not cpu_sda_in;
end if;
elsif playback_scl_fedge = '1' and mux_r.playback_bits < sample_r.bit_count then
-- oe = 1 is output = 0 so there's an inversion here.
v.dimm_sda_oe := not sample_r.data_bits(mux_r.playback_bits);
elsif playback_scl_fedge = '1' and mux_r.playback_bits = sample_r.bit_count and cpu_scl_in = '0' then
elsif playback_scl_fedge = '1' and mux_r.playback_bits = sample_r.bit_count then
-- we've played all the bits we have, just float sda until we can hand off
v.state := ENSURE_PLAYBACK_HOLD;
v.playback_done := '1';
Expand Down
249 changes: 249 additions & 0 deletions hdl/projects/cosmo_seq/dimms_subsystem/sims/dimm_arb_mux_tb.vhd
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
-- This Source Code Form is subject to the terms of the Mozilla Public
-- License, v. 2.0. If a copy of the MPL was not distributed with this
-- file, You can obtain one at https://mozilla.org/MPL/2.0/.
--
-- Copyright 2025 Oxide Computer Company
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
-- Copyright 2025 Oxide Computer Company
-- Copyright 2026 Oxide Computer Company

Do we need this statement even? Over in hubris we don't have it and just use those first 3 MPL lines.


-- Unit-level testbench for dimm_arb_mux.
--
-- Bug 1 cannot be triggered at the integration level: the FPGA abort always finishes
-- before the CPU's first SCL falling edge (~2-3 µs vs the 5 µs STANDARD half-period),
-- so PLAY_STORED_START is entered while sample_r.state=SAMPLE_START and Bug 3 fires
-- instead. Direct stimulus control lets us reach PLAY_STORED_DATA with bit_count=1
-- while keeping cpu_scl_in='1' through the catch-up rising edge.

library ieee;
use ieee.std_logic_1164.all;

library vunit_lib;
context vunit_lib.vunit_context;

use work.i2c_common_pkg.all;
use work.tristate_if_pkg.all;

entity dimm_arb_mux_tb is
generic (runner_cfg : string);
end entity;

architecture tb of dimm_arb_mux_tb is
constant CLK_PER_NS : positive := 8;
constant I2C_MODE : mode_t := FAST_PLUS;

signal clk : std_logic := '0';
signal reset : std_logic := '1';

signal cpu_scl_in : std_logic := '1';
signal cpu_scl_fedge : std_logic := '0';
signal cpu_scl_redge : std_logic := '0';
signal cpu_sda_in : std_logic := '1';
signal cpu_sda_fedge : std_logic := '0';
signal cpu_sda_redge : std_logic := '0';

-- dimm_scl/sda are the inputs the bus_idle_monitor watches.
-- Keep them both high so dimm_i2c_idle asserts quickly.
signal dimm_sda : std_logic := '1';
signal dimm_scl : std_logic := '1';

signal bus_request : std_logic;
signal bus_grant : std_logic := '0';
signal fpga_i2c_grant : std_logic := '0';
signal fpga_i2c_abort_or_finish : std_logic;

signal playback_sda_if : tristate;
signal playback_scl_if : tristate;

signal fpga_i2c_has_bus : std_logic;
signal sp5_playback_i2c_has_bus : std_logic;
signal sp5_i2c_has_bus : std_logic;
begin

clk <= not clk after 4 ns;

-- Provide idle bus feedback to the DUT (not functionally used by dimm_arb_mux
-- for these ports, but must be driven to avoid 'U' in the model).
playback_sda_if.i <= '1';
playback_scl_if.i <= '1';

DUT: entity work.dimm_arb_mux
generic map (
CLK_PER_NS => CLK_PER_NS,
I2C_MODE => I2C_MODE
)
port map (
clk => clk,
reset => reset,
in_a0 => '1',
cpu_scl_in => cpu_scl_in,
cpu_scl_fedge => cpu_scl_fedge,
cpu_scl_redge => cpu_scl_redge,
cpu_sda_in => cpu_sda_in,
cpu_sda_fedge => cpu_sda_fedge,
cpu_sda_redge => cpu_sda_redge,
dimm_sda => dimm_sda,
dimm_scl => dimm_scl,
bus_request => bus_request,
bus_grant => bus_grant,
fpga_i2c_grant => fpga_i2c_grant,
fpga_i2c_abort_or_finish => fpga_i2c_abort_or_finish,
playback_sda => playback_sda_if,
playback_scl => playback_scl_if,
fpga_i2c_has_bus => fpga_i2c_has_bus,
sp5_playback_i2c_has_bus => sp5_playback_i2c_has_bus,
sp5_i2c_has_bus => sp5_i2c_has_bus
);

-- Once the intentional start condition has been generated (the first SDA change
-- while the playback SCL is high), no further SDA transition is legal while SCL
-- is still high. Any such change is a spurious start or stop on the DIMM bus.
sda_glitch_monitor: process
variable start_seen : boolean := false;
begin
loop
wait on playback_sda_if.oe;
if sp5_playback_i2c_has_bus = '1' and playback_scl_if.o = '1' then
if not start_seen then
start_seen := true;
else
check_false(true,
"Bug 1: SDA changed while playback SCL was high -- " &
"spurious start/stop condition on DIMM bus");
end if;
end if;
end loop;
wait;
end process;

bench: process
procedure clk_tick is begin wait until rising_edge(clk); end procedure;
begin
test_runner_setup(runner, runner_cfg);

while test_suite loop
if run("arb_bug1_sda_glitch_during_playback") then
-- Release reset, wait for POWER_UP_CLEAR (~9 × 1000 ns at FAST_PLUS)
-- and for dimm_i2c_idle to assert (~504 ns with both dimm lines held high).
reset <= '1';
clk_tick;
reset <= '0';
wait for 10 us;

-- Step 1: CPU start condition — SDA falls while SCL is high.
-- cpu_start_detected fires → mux: IDLE → CPU_REQ_GRANT.
-- sample: SAMPLE_IDLE → SAMPLE_START.
cpu_scl_in <= '1';
cpu_sda_in <= '0';
cpu_sda_fedge <= '1';
clk_tick;
cpu_sda_fedge <= '0';
clk_tick;

-- Step 2: CPU SCL falling edge.
-- sample: SAMPLE_START → SAMPLE_DATA (bit_count still 0).
cpu_scl_in <= '0';
cpu_scl_fedge <= '1';
clk_tick;
cpu_scl_fedge <= '0';
clk_tick;

-- Step 3: CPU SCL rising edge; sample first bit (sda_in='0').
-- sample: bit_count becomes 1, data_bits(0)='0'.
cpu_scl_in <= '1';
cpu_scl_redge <= '1';
clk_tick;
cpu_scl_redge <= '0';
clk_tick;

-- Step 4: Grant the bus.
-- sample_r.state=SAMPLE_DATA (not SAMPLE_START) → Bug 3 cannot fire
-- even though cpu_scl_in='1'.
-- mux: CPU_REQ_GRANT → PLAY_STORED_START (dimm_i2c_idle='1' already).
bus_grant <= '1';
wait until sp5_playback_i2c_has_bus = '1';

-- Step 5: Hold cpu_scl_in='1' and set cpu_sda_in='1' so the catch-up
-- REDGE produces a visible SDA change.
--
-- data_bits(0)='0' → playback currently drives SDA low (oe='1').
-- Bug 1 will set oe := not(cpu_sda_in) = not('1') = '0' on the catch-up
-- REDGE, releasing SDA while the playback SCL is still high.
--
-- Timeline from PLAY_STORED_START entry (playback SCL starts at '1'):
-- T + ~500 ns : first playback FEDGE → enter PLAY_STORED_DATA,
-- dimm_sda_oe set to not(data_bits(0))='1' (no change)
-- T + ~1000 ns: first playback REDGE, playback_bits=1=bit_count,
-- cpu_scl_in='1' → Bug 1 exits to ENSURE_PLAYBACK_HOLD,
-- schedules dimm_sda_oe='0' for next cycle
-- T + ~1008 ns: playback_sda_if.oe '1'→'0' while playback_scl_if.o='1'
-- → sda_glitch_monitor fires check_false → test fails
cpu_sda_in <= '1';
wait for 2 us;

bus_grant <= '0';
wait for 500 ns;

elsif run("arb_bug2_playback_stall") then
-- Direct stimulus reproduction of Bug 2.
--
-- Bug 2: in PLAY_STORED_DATA, when playback_bits catches up to bit_count
-- at a falling playback SCL edge while cpu_scl_in='1', the exit condition
-- `playback_scl_fedge='1' and playback_bits=bit_count and cpu_scl_in='0'`
-- is FALSE because cpu_scl_in is '1'. The following REDGE overshoots to
-- playback_bits=2 > bit_count=1, and no subsequent condition ever matches,
-- so the machine spins in PLAY_STORED_DATA forever.
--
-- The setup is identical to the Bug 1 test (SAMPLE_DATA, bit_count=1) but
-- the check monitors sp5_i2c_has_bus rather than SDA. With Bug 2 fixed the
-- mux exits to ENSURE_PLAYBACK_HOLD → CPU_HAS_BUS within ~2 µs.
reset <= '1';
clk_tick;
reset <= '0';
wait for 10 us;

-- CPU start condition: SAMPLE_IDLE → SAMPLE_START, mux: IDLE → CPU_REQ_GRANT.
cpu_scl_in <= '1';
cpu_sda_in <= '0';
cpu_sda_fedge <= '1';
clk_tick;
cpu_sda_fedge <= '0';
clk_tick;

-- CPU SCL falling edge: SAMPLE_START → SAMPLE_DATA (bit_count=0).
cpu_scl_in <= '0';
cpu_scl_fedge <= '1';
clk_tick;
cpu_scl_fedge <= '0';
clk_tick;

-- CPU SCL rising edge: sample first bit ('0'), bit_count becomes 1.
cpu_scl_in <= '1';
cpu_scl_redge <= '1';
clk_tick;
cpu_scl_redge <= '0';
clk_tick;

-- Grant bus with sample_r.state=SAMPLE_DATA (not SAMPLE_START) so Bug 3
-- cannot fire. Mux enters PLAY_STORED_START → PLAY_STORED_DATA.
bus_grant <= '1';
wait until sp5_playback_i2c_has_bus = '1';

-- Keep cpu_scl_in='1' so Bug 2's exit condition (cpu_scl_in='0') is
-- blocked. With the fix removed, the mux never exits PLAY_STORED_DATA.
-- With Bug 2 fixed, the FEDGE exit fires when playback_bits=bit_count=1
-- regardless of cpu_scl_in, and sp5_i2c_has_bus asserts within ~2 µs.
wait until sp5_i2c_has_bus = '1' for 50 us;
check_true(sp5_i2c_has_bus = '1',
"Bug 2: mux stalled in PLAY_STORED_DATA -- never reached CPU_HAS_BUS");

bus_grant <= '0';
wait for 500 ns;
end if;
end loop;

wait for 500 ns;
test_runner_cleanup(runner);
wait;
end process;

test_runner_watchdog(runner, 1 ms);

end tb;
Loading
Loading