-- -- AXI3 master for multi-channel DMA controller. -- -- Joris van Rantwijk 2024 -- library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use work.puzzlefw_pkg.all; entity dma_axi_master is generic ( -- Number of beats per transfer. transfer_size: integer range 1 to 16 := 16; -- Number of read channels. num_read_channels: integer range 0 to 16; -- Number of write channels. num_write_channels: integer range 0 to 16 ); port ( -- Main clock, active on rising edge. clk: in std_logic; -- Reset, active high, synchronous to main clock. reset: in std_logic; -- High to enable DMA, low to pause DMA. -- When dma_en is low, any ongoing transfer will be completed but no new transfer will be started. dma_en: in std_logic; -- High while DMA transactions are in progress. dma_busy: out std_logic; -- Address window in which DMA transaction can occur. -- The base address is added to the addresses requested by the channels. window_base_addr: in std_logic_vector(31 downto 12); window_size: in std_logic_vector(31 downto 12); -- Error notifications. -- If an error occurs, the corresponding error signal will go high and stay high until cleared. -- After error, further DMA transactions will be halted until the error is cleared. -- Note that DMA errors will typically cause misalignment of the data flow in client channels. err_read: out std_logic; -- AXI slave reported error from read transaction err_write: out std_logic; -- AXI slave reported error from write transaction err_address: out std_logic; -- Channel requested address outside window err_any: out std_logic; -- Logical OR of all error signals -- High to clear error notifications. clear_errors: in std_logic; -- Read channels. -- The client passes a read command for a specified address via valid/ready handshake. -- Some time later, the controller pushes (transfer_size) data words out to the channel. -- The client must be ready to accept all data words. -- Multiple transfers can be in flight for the channel. read_cmd_addr: in dma_address_array(0 to num_read_channels-1); read_cmd_valid: in std_logic_vector(num_read_channels-1 downto 0); read_cmd_ready: out std_logic_vector(num_read_channels-1 downto 0); read_data: out dma_data_array(0 to num_read_channels-1); read_data_valid: out std_logic_vector(num_read_channels-1 downto 0); -- Write channels. -- The client passes a write command for a specified address via valid/ready handshake. -- Some time later, the controller pulls (transfer_size) data words from the channel. -- The client must supply these data words promptly. -- Some more time later, the controller pulses write_finished to indicate that the write has finished. -- Multiple transfers can be in flight for the channel. write_cmd_addr: in dma_address_array(0 to num_write_channels-1); write_cmd_valid: in std_logic_vector(num_write_channels-1 downto 0); write_cmd_ready: out std_logic_vector(num_write_channels-1 downto 0); write_data: in dma_data_array(0 to num_write_channels-1); write_data_ready: out std_logic_vector(num_write_channels-1 downto 0); write_finished: out std_logic_vector(num_write_channels-1 downto 0); -- AXI3 master. m_axi_awid: out std_logic_vector(5 downto 0); m_axi_awaddr: out std_logic_vector(31 downto 0); m_axi_awlen: out std_logic_vector(3 downto 0); m_axi_awsize: out std_logic_vector(2 downto 0); m_axi_awburst: out std_logic_vector(1 downto 0); m_axi_awlock: out std_logic_vector(1 downto 0); m_axi_awcache: out std_logic_vector(3 downto 0); m_axi_awprot: out std_logic_vector(2 downto 0); m_axi_awqos: out std_logic_vector(3 downto 0); m_axi_awvalid: out std_logic; m_axi_awready: in std_logic; m_axi_wid: out std_logic_vector(5 downto 0); m_axi_wdata: out std_logic_vector(63 downto 0); m_axi_wstrb: out std_logic_vector(7 downto 0); m_axi_wlast: out std_logic; m_axi_wvalid: out std_logic; m_axi_wready: in std_logic; m_axi_bid: in std_logic_vector(5 downto 0); m_axi_bresp: in std_logic_vector(1 downto 0); m_axi_bvalid: in std_logic; m_axi_bready: out std_logic; m_axi_arid: out std_logic_vector(5 downto 0); m_axi_araddr: out std_logic_vector(31 downto 0); m_axi_arlen: out std_logic_vector(3 downto 0); m_axi_arsize: out std_logic_vector(2 downto 0); m_axi_arburst: out std_logic_vector(1 downto 0); m_axi_arlock: out std_logic_vector(1 downto 0); m_axi_arcache: out std_logic_vector(3 downto 0); m_axi_arprot: out std_logic_vector(2 downto 0); m_axi_arqos: out std_logic_vector(3 downto 0); m_axi_arvalid: out std_logic; m_axi_arready: in std_logic; m_axi_rid: in std_logic_vector(5 downto 0); m_axi_rdata: in std_logic_vector(63 downto 0); m_axi_rresp: in std_logic_vector(1 downto 0); m_axi_rlast: in std_logic; m_axi_rvalid: in std_logic; m_axi_rready: out std_logic ); end entity; architecture arch of dma_axi_master is type write_state_type is (WRITE_STATE_IDLE, WRITE_STATE_START, WRITE_STATE_FLOW, WRITE_STATE_WAIT); type read_state_type is (READ_STATE_IDLE, READ_STATE_START, READ_STATE_WAIT); type regs_type is record -- Registered output signals to AXI bus. awaddr: std_logic_vector(31 downto 3); awvalid: std_logic; wdata: std_logic_vector(63 downto 0); wlast: std_logic; wvalid: std_logic; araddr: std_logic_vector(31 downto 3); arvalid: std_logic; -- Registered output signals to read channels. read_cmd_ready: std_logic_vector(num_read_channels-1 downto 0); read_data: std_logic_vector(63 downto 0); read_data_valid: std_logic_vector(num_read_channels-1 downto 0); -- Registered output signals to write channels. write_cmd_ready: std_logic_vector(num_write_channels-1 downto 0); write_finished: std_logic_vector(num_write_channels-1 downto 0); -- Registered status output signals. dma_busy: std_logic; err_read: std_logic; err_write: std_logic; err_address: std_logic; err_any: std_logic; -- Write state machine. write_state: write_state_type; write_channel: integer range 0 to maximum(0, num_write_channels - 1); write_channel_mask: std_logic_vector(num_write_channels-1 downto 0); cnt_write_beat: unsigned(3 downto 0); cnt_write_start: unsigned(5 downto 0); cnt_write_end: unsigned(5 downto 0); -- Read command state machine. read_state: read_state_type; read_channel: integer range 0 to maximum(0, num_read_channels - 1); cnt_read_start: unsigned(5 downto 0); cnt_read_end: unsigned(5 downto 0); end record; constant regs_init: regs_type := ( awaddr => (others => '0'), awvalid => '0', wdata => (others => '0'), wlast => '0', wvalid => '0', araddr => (others => '0'), arvalid => '0', read_cmd_ready => (others => '0'), read_data => (others => '0'), read_data_valid => (others => '0'), write_cmd_ready => (others => '0'), write_finished => (others => '0'), dma_busy => '0', err_read => '0', err_write => '0', err_address => '0', err_any => '0', write_state => WRITE_STATE_IDLE, write_channel => 0, write_channel_mask => (others => '0'), cnt_write_beat => (others => '0'), cnt_write_start => (others => '0'), cnt_write_end => (others => '0'), read_state => READ_STATE_IDLE, read_channel => 0, cnt_read_start => (others => '0'), cnt_read_end => (others => '0') ); signal r: regs_type := regs_init; signal rnext: regs_type; -- Check that the DMA transfer fits inside the address window if it starts at the specified address offset function is_valid_dma_address(addr: std_logic_vector(31 downto 3); limit: std_logic_vector(31 downto 12)) return boolean is begin return (unsigned(limit) /= 0) and (unsigned(addr) <= shift_left(unsigned(limit), 9) - transfer_size); end function; -- Calculate tha AXI address for a DMA transfer by adding the address offset from the client -- to the base address of the DMA window. -- Returns the 29 most significant address bits; the 3 least significant bits are presumed to be 0. function calc_dma_address(addr: std_logic_vector(31 downto 3); base_addr: std_logic_vector(31 downto 12)) return std_logic_vector is begin return std_logic_vector(unsigned(addr) + shift_left(resize(unsigned(base_addr), 29), 9)); end function; begin -- Drive fixed output signals to AXI bus. m_axi_awlen <= std_logic_vector(to_unsigned(transfer_size - 1, 4)); -- use fixed burst length m_axi_arlen <= std_logic_vector(to_unsigned(transfer_size - 1, 4)); -- use fixed burst length m_axi_awsize <= "011"; -- always use 64-bit transfers m_axi_arsize <= "011"; -- always use 64-bit transfers m_axi_awburst <= "01"; -- always use incrementing burst m_axi_arburst <= "01"; -- always use incrementing burst m_axi_awlock <= "00"; -- normal access m_axi_arlock <= "00"; -- normal access m_axi_awcache <= "0010"; -- normal memory, non-cacheable, non-bufferable m_axi_arcache <= "0010"; -- normal memory, non-cacheable, non-bufferable m_axi_awprot <= "000"; -- data access, secure, unprivileged m_axi_arprot <= "000"; -- data access, secure, unprivileged m_axi_awqos <= "0000"; -- no QoS m_axi_arqos <= "0000"; -- no QoS m_axi_wstrb <= "11111111"; -- always write all byte lanes m_axi_bready <= '1'; -- always ready for write response m_axi_rready <= '1'; -- always ready for read response -- Drive variable output signals to AXI bus. m_axi_awid <= std_logic_vector(to_unsigned(r.write_channel, 6)); m_axi_awaddr <= r.awaddr & "000"; -- addresses are 8-byte aligned m_axi_awvalid <= r.awvalid; m_axi_wid <= std_logic_vector(to_unsigned(r.write_channel, 6)); m_axi_wdata <= r.wdata; m_axi_wlast <= r.wlast; m_axi_wvalid <= r.wvalid; m_axi_arid <= std_logic_vector(to_unsigned(r.read_channel, 6)); m_axi_araddr <= r.araddr & "000"; -- addresses are 8-byte aligned m_axi_arvalid <= r.arvalid; -- Drive output signals to read channels. read_cmd_ready <= r.read_cmd_ready; read_data_valid <= r.read_data_valid; gen_rdata: for ch in 0 to num_read_channels - 1 generate read_data(ch) <= r.read_data; end generate; -- Drive output signals to write channels. write_cmd_ready <= r.write_cmd_ready; write_finished <= r.write_finished; -- Write data ready signalling is complicated: -- - During WRITE_STATE_START, one write_data_ready cycle is sent to the selected channel. -- - During WRITE_STATE_FLOW, write_data_ready to the selected channel depends asynchronously on AXI wready. write_data_ready <= r.write_channel_mask when ((r.write_state = WRITE_STATE_START) or (r.write_state = WRITE_STATE_FLOW and m_axi_wready = '1')) else (others => '0'); -- Drive output signals. dma_busy <= r.dma_busy; err_read <= r.err_read; err_write <= r.err_write; err_address <= r.err_address; err_any <= r.err_any; -- -- Combinatorial process. -- process (all) is variable v: regs_type; begin -- Load current register values. v := r; -- Report DMA busy/idle. if (r.cnt_write_start = r.cnt_write_end) and (r.cnt_read_start = r.cnt_read_end) then v.dma_busy := '0'; else v.dma_busy := '1'; end if; -- Clear pending errors. if clear_errors = '1' then v.err_read := '0'; v.err_write := '0'; v.err_address := '0'; v.err_any := '0'; end if; -- -- Write state machine. -- if num_write_channels > 0 then -- By default, do not accept write commands from any channel. v.write_cmd_ready := (others => '0'); -- Maintain one-hot write channel mask for the selected write channel. -- This register is needed during WRITE_STATE_START and WRITE_STATE_FLOW. v.write_channel_mask := (others => '0'); v.write_channel_mask(r.write_channel) := '1'; case r.write_state is when WRITE_STATE_IDLE => -- Cycle through write channels until we find a channel that wants to write. if (dma_en = '1') and (r.err_any = '0') then if write_cmd_valid(r.write_channel) = '1' then -- This channel wants to read. Let's do it. v.write_state := WRITE_STATE_START; v.write_cmd_ready(r.write_channel) := '1'; v.dma_busy := '1'; else -- Move on to the next channel. if r.write_channel >= num_write_channels - 1 then v.write_channel := 0; else v.write_channel := r.write_channel + 1; end if; end if; end if; when WRITE_STATE_START => -- Calculate the AXI address by adding the client address offset to the window base address. v.awaddr := calc_dma_address(write_cmd_addr(r.write_channel), window_base_addr); -- Setup first data word. v.wdata := write_data(r.write_channel); v.cnt_write_beat := (others => '0'); if transfer_size = 1 then v.wlast := '1'; else v.wlast := '0'; end if; -- Check address. if is_valid_dma_address(write_cmd_addr(r.write_channel), window_size) then -- Setup AXI write burst. v.awvalid := '1'; v.wvalid := '1'; v.cnt_write_start := r.cnt_write_start + 1; if transfer_size = 1 then v.write_state := WRITE_STATE_WAIT; else v.write_state := WRITE_STATE_FLOW; end if; else -- Report invalid address. -- At this point, the client channel will be stuck with misaligned write command -- and data channels, and this write transfer will never be confirmed. -- To recover, some type of reset of the client channel will be necessary. v.err_address := '1'; v.err_any := '1'; v.write_state := WRITE_STATE_IDLE; end if; -- Mark DMA busy. v.dma_busy := '1'; when WRITE_STATE_FLOW => -- Push subsequent data words to the interconnect. -- Drop write command when accepted by interconnect. if m_axi_awready = '1' then v.awvalid := '0'; end if; -- Push data words to interconnect. if m_axi_wready = '1' then v.wdata := write_data(r.write_channel); if r.cnt_write_beat = transfer_size - 2 then -- This will be the last beat of the transfer. v.wlast := '1'; v.write_state := WRITE_STATE_WAIT; else v.wlast := '0'; end if; v.cnt_write_beat := r.cnt_write_beat + 1; end if; when WRITE_STATE_WAIT => -- Wait until interconnect accepts the last beat. if m_axi_awready = '1' then v.awvalid := '0'; end if; if m_axi_wready = '1' then v.wvalid := '0'; end if; if (r.awvalid = '0' or m_axi_awready = '1') and (r.wvalid = '0' or m_axi_wready = '1') then v.write_state := WRITE_STATE_IDLE; end if; end case; end if; -- -- Handle write completion. -- -- Report write completion to the channel client. -- Only successful write bursts are reported. -- Note that a write error will cause misalignment between command and data flow in the channel. for i in 0 to num_write_channels - 1 loop if (m_axi_bvalid = '1') and (m_axi_bresp(1) = '0') and (unsigned(m_axi_bid(3 downto 0)) = i) then v.write_finished(i) := '1'; else v.write_finished(i) := '0'; end if; end loop; -- Detect write errors. if (m_axi_bvalid = '1') and (m_axi_bresp(1) = '1') then v.err_write := '1'; v.err_any := '1'; end if; -- Count write burst completed. if m_axi_bvalid = '1' then v.cnt_write_end := r.cnt_write_end + 1; end if; -- -- Read command state machine. -- if num_read_channels > 0 then -- By default, do not accept read commands from any channel. v.read_cmd_ready := (others => '0'); case r.read_state is when READ_STATE_IDLE => -- Cycle through read channels until we find a channel that wants to read. if (dma_en = '1') and (r.err_any = '0') then if read_cmd_valid(r.read_channel) = '1' then -- This channel wants to read. Let's do it. v.read_state := READ_STATE_START; v.read_cmd_ready(r.read_channel) := '1'; v.dma_busy := '1'; else -- Move on to the next channel. if r.read_channel >= num_read_channels - 1 then v.read_channel := 0; else v.read_channel := r.read_channel + 1; end if; end if; end if; when READ_STATE_START => -- Calculate the AXI address by adding the client address offset to the window base address. v.araddr := calc_dma_address(read_cmd_addr(r.read_channel), window_base_addr); -- Check address. if is_valid_dma_address(read_cmd_addr(r.read_channel), window_size) then -- Setup AXI read burst. v.arvalid := '1'; v.read_state := READ_STATE_WAIT; else -- Report invalid address. -- At this point, the client channel will be stuck waiting for results -- that are never going to arrive. -- To recover, some type of reset of the client channel will be necessary. v.err_address := '1'; v.err_any := '1'; v.read_state := READ_STATE_IDLE; end if; -- Mark DMA busy. v.dma_busy := '1'; when READ_STATE_WAIT => -- Wait until interconnect accepts our read burst. v.dma_busy := '1'; if m_axi_arready = '1' then -- Read burst accepted. v.arvalid := '0'; v.cnt_read_start := r.cnt_read_start + 1; v.read_state := READ_STATE_IDLE; end if; end case; end if; -- -- Handle read result. -- -- Latch read result in a register. if m_axi_rvalid = '1' then v.read_data := m_axi_rdata; end if; -- Report read data to the channel client. -- Only successful read results are reported. -- Note than a read error will cause misalignment between command and data flow in the channel. for i in 0 to num_read_channels - 1 loop if (m_axi_rvalid = '1') and (m_axi_rresp(1) = '0') and (unsigned(m_axi_rid(3 downto 0)) = i) then v.read_data_valid(i) := '1'; else v.read_data_valid(i) := '0'; end if; end loop; -- Detect read errors. if (m_axi_rvalid = '1') and (m_axi_rresp(1) = '1') then v.err_read := '1'; v.err_any := '1'; end if; -- Count read burst completed. if m_axi_rvalid = '1' and m_axi_rlast = '1' then v.cnt_read_end := r.cnt_read_end + 1; end if; -- Synchronous reset. if reset = '1' then v := regs_init; end if; -- Drive new register values to synchronous process. rnext <= v; end process; -- -- Synchronous process. -- process (clk) is begin if rising_edge(clk) then r <= rnext; end if; end process; end architecture;