litedram: Add an L2 cache with store queue

This adds a cache between the wishbone and litedram with the following
features (at this point, it's still evolving)

  - 128 bytes line width in order to have a reasonable amount of
litedram pipelining on the 128-bit wide data port.

  - Configurable geometry otherwise

  - Stores are acked immediately on wishbone whether hit or miss
(minus a 2 cycles delay if there's a previous load response in the
way) and sent to LiteDRAM via 8 entries (configurable) store queue

@ -50,7 +50,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl
core.vhdl core.vhdl

soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl

soc_sim_files = sim_console.vhdl sim_uart.vhdl sim_bram_helpers.vhdl \

Signals in entities :
$ version 1.1

# Signals in entities :

library work;

library work; library work;
use work.wishbone_types.all; use work.wishbone_types.all;
use work.utils.all;
use work.helpers.all;

entity litedram_wrapper is entity litedram_wrapper is
generic ( generic (
DRAM_ABITS : positive; DRAM_ABITS : positive;
DRAM_ALINES : positive; DRAM_ALINES : positive;

-- Pseudo-ROM payload -- Pseudo-ROM payload
PAYLOAD_SIZE : natural; PAYLOAD_SIZE : natural;
PAYLOAD_FILE : string; PAYLOAD_FILE : string;

-- L2 cache --

-- Line size in bytes
LINE_SIZE : positive := 128;
-- Number of lines in a set
NUM_LINES : positive := 32;
-- Number of ways
NUM_WAYS : positive := 4;
-- Max number of stores in the queue
STOREQ_DEPTH : positive := 8;
-- Don't send loads until all pending stores acked in litedram
NO_LS_OVERLAP : boolean := false;

-- Debug -- Debug
LITEDRAM_TRACE : boolean := false LITEDRAM_TRACE : boolean := false;
TRACE : boolean := false
); );
port( port(
-- LiteDRAM generates the system clock and reset -- LiteDRAM generates the system clock and reset
-- LiteDRAM generates the system clock and reset
signal user_port0_rdata_ready : std_ulogic; signal user_port0_rdata_ready : std_ulogic;
signal user_port0_rdata_data : std_ulogic_vector(127 downto 0); signal user_port0_rdata_data : std_ulogic_vector(127 downto 0);

signal ad3 : std_ulogic;

signal wb_ctrl_adr : std_ulogic_vector(29 downto 0); signal wb_ctrl_adr : std_ulogic_vector(29 downto 0);
signal wb_ctrl_dat_w : std_ulogic_vector(31 downto 0); signal wb_ctrl_dat_w : std_ulogic_vector(31 downto 0);
signal wb_ctrl_dat_r : std_ulogic_vector(31 downto 0); signal wb_ctrl_dat_r : std_ulogic_vector(31 downto 0);
signal wb_ctrl_sel : std_ulogic_vector(3 downto 0); signal wb_ctrl_sel : std_ulogic_vector(3 downto 0);
signal wb_ctrl_cyc : std_ulogic; signal wb_ctrl_cyc : std_ulogic := '0';
signal wb_ctrl_stb : std_ulogic; signal wb_ctrl_stb : std_ulogic;
signal wb_ctrl_ack : std_ulogic; signal wb_ctrl_ack : std_ulogic;
signal wb_ctrl_we : std_ulogic; signal wb_ctrl_we : std_ulogic;
@ -137,11 +153,239 @@ architecture behaviour of litedram_wrapper is
signal wb_init_in : wb_io_master_out; signal wb_init_in : wb_io_master_out;
signal wb_init_out : wb_io_slave_out; signal wb_init_out : wb_io_slave_out;

type state_t is (CMD, MWRITE, MREAD); -- DRAM data port width
constant DRAM_DBITS : natural := 128;
constant DRAM_SBITS : natural := (DRAM_DBITS / 8);

-- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line"
-- ROW_SIZE is the width in bytes of the BRAM (based on litedram, so 128-bits)
constant ROW_SIZE : natural := DRAM_DBITS / 8;
-- ROW_PER_LINE is the number of row (litedram transactions) in a line
constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
-- BRAM_ROWS is the number of rows in BRAM needed to represent the full
-- dcache
constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;

-- Bit fields counts in the address

-- ROW_BITS is the number of bits to select a row
constant ROW_BITS : natural := log2(BRAM_ROWS);
-- ROW_LINEBITS is the number of bits to select a row within a line
constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
-- LINE_OFF_BITS is the number of bits for the offset in a cache line
constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
-- ROW_OFF_BITS is the number of bits for the offset in a row
constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
-- REAL_ADDR_BITS is the number of real address bits that we store
constant REAL_ADDR_BITS : positive := DRAM_ABITS + ROW_OFF_BITS;
-- INDEX_BITS is the number if bits to select a cache line
constant INDEX_BITS : natural := log2(NUM_LINES);
-- SET_SIZE_BITS is the log base 2 of the set size
constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
-- TAG_BITS is the number of bits of the tag part of the address
constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
-- WAY_BITS is the number of bits to select a way
constant WAY_BITS : natural := log2(NUM_WAYS);

subtype row_t is integer range 0 to BRAM_ROWS-1;
subtype index_t is integer range 0 to NUM_LINES-1;
subtype way_t is integer range 0 to NUM_WAYS-1;

-- The cache data BRAM organized as described above for each way
subtype cache_row_t is std_ulogic_vector(DRAM_DBITS-1 downto 0);

-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
-- not handle a clean (commented) definition of the cache tags as a 3d
-- memory. For now, work around it by putting all the tags
subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
-- type cache_tags_set_t is array(way_t) of cache_tag_t;
-- type cache_tags_array_t is array(index_t) of cache_tags_set_t;
constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
type cache_tags_array_t is array(index_t) of cache_tags_set_t;

-- The cache valid bits
subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
type cache_valids_t is array(index_t) of cache_way_valids_t;

-- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
signal cache_tags : cache_tags_array_t;
signal cache_valids : cache_valids_t;

attribute ram_style : string;
attribute ram_style of cache_tags : signal is "distributed";

-- Store queue signals
-- We store a single wishbone dword per entry (64-bit) but all
-- 16 sel bits for the DRAM.
-- XXX Investigate storing only AD3 and 8 sel bits if it's better
constant STOREQ_BITS : positive := wishbone_data_bits + DRAM_SBITS;

signal storeq_rd_ready : std_ulogic;
signal storeq_rd_valid : std_ulogic;
signal storeq_rd_data : std_ulogic_vector(STOREQ_BITS-1 downto 0);
signal storeq_wr_ready : std_ulogic;
signal storeq_wr_valid : std_ulogic;
signal storeq_wr_data : std_ulogic_vector(STOREQ_BITS-1 downto 0);

-- Cache management signals

-- Cache state machine
type state_t is (IDLE, -- Normal load hit processing
REFILL_WAIT_ACK); -- Cache refill wait ack
signal state : state_t; signal state : state_t;

-- Latched WB request.
signal wb_req : wishbone_master_out := wishbone_master_out_init;

-- Read pipeline (to handle cache RAM latency)
signal read_ack_0 : std_ulogic;
signal read_ack_1 : std_ulogic;
signal read_ad3_0 : std_ulogic;
signal read_ad3_1 : std_ulogic;
signal read_way_0 : way_t;
signal read_way_1 : way_t;

-- Async signals decoding latched request
type req_op_t is (OP_NONE,

signal req_index : index_t;
signal req_row : row_t;
signal req_hit_way : way_t;
signal req_tag : cache_tag_t;
signal req_op : req_op_t;
signal req_laddr : std_ulogic_vector(REAL_ADDR_BITS-1 downto 0);
signal req_ad3 : std_ulogic;
signal req_we : std_ulogic_vector(DRAM_SBITS-1 downto 0);
signal req_wdata : std_ulogic_vector(DRAM_DBITS-1 downto 0);
signal accept_store : std_ulogic;

-- Line refill command signals and latches
signal refill_cmd_valid : std_ulogic;
signal refill_cmd_addr : std_ulogic_vector(DRAM_ABITS-1 downto 0);
signal refill_way : way_t;
signal refill_index : index_t;
signal refill_row : row_t;

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t;

-- PLRU output interface
type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
signal plru_victim : plru_out_t;

-- Helper functions to decode incoming requests

-- Return the cache line index (tag index) for an address
function get_index(addr: wishbone_addr_type) return index_t is
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)));

-- Return the cache row index (data memory) for an address
function get_row(addr: std_ulogic_vector(REAL_ADDR_BITS-1 downto 0)) return row_t is
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));

-- Returns whether this is the last row of a line. It takes a DRAM address
function is_last_row_addr(addr: std_ulogic_vector(REAL_ADDR_BITS-1 downto ROW_OFF_BITS))
return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;

-- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
return row_v(ROW_LINEBITS-1 downto 0) = ones;

-- Return the address of the next row in the current cache line. It takes a
-- DRAM address
function next_row_addr(addr: std_ulogic_vector(REAL_ADDR_BITS-1 downto ROW_OFF_BITS))
return std_ulogic_vector is
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(REAL_ADDR_BITS-1 downto ROW_OFF_BITS);
-- Is there no simpler way in VHDL to generate that 3 bits adder ?
row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
result := addr;
result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
return result;

-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));

-- Get the tag value from the address
function get_tag(addr: wishbone_addr_type) return cache_tag_t is
return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);

-- Read a tag from a tag memory row
function read_tag(way: way_t; tagset: cache_tags_set_t) return cache_tag_t is
return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);

-- Write a tag to tag memory row
procedure write_tag(way: in way_t; tagset: inout cache_tags_set_t;
tag: cache_tag_t) is
tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;

begin begin

-- Sanity checks
assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;
assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2" severity FAILURE;
report "geometry bits don't add up" severity FAILURE;
report "geometry bits don't add up" severity FAILURE;
report "geometry bits don't add up" severity FAILURE;
report "geometry bits don't add up" severity FAILURE;
assert (128 = DRAM_DBITS)
report "Can't yet handle a DRAM width that isn't 128-bits" severity FAILURE;

-- alternate core reset address set when DRAM is not initialized. -- alternate core reset address set when DRAM is not initialized.
core_alt_reset <= not init_done; core_alt_reset <= not init_done;

@ -170,7 +414,15 @@ begin
wb_init_in.stb <= wb_ctrl_in.stb; wb_init_in.stb <= wb_ctrl_in.stb;
wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init; wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init;

-- DRAM CSR IN signals -- DRAM CSR IN signals. Extra latch to help with timing
csr_latch: process(system_clk)
if rising_edge(system_clk) then
if system_reset = '1' then
wb_ctrl_cyc <= '0';
wb_ctrl_stb <= '0';
-- XXX Maybe only update addr when cyc = '1' to save power ?
wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2); wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2);
wb_ctrl_dat_w <= wb_ctrl_in.dat; wb_ctrl_dat_w <= wb_ctrl_in.dat;
wb_ctrl_sel <= wb_ctrl_in.sel; wb_ctrl_sel <= wb_ctrl_in.sel;
@ -178,7 +430,19 @@ begin
wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr; wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr;
wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr; wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr;

-- Ctrl bus wishbone OUT signals -- Clear stb on ack otherwise the memory will latch
-- the write twice which breaks levelling. On the next
-- cycle we will latch an updated stb that takes the
-- ack into account.
if wb_ctrl_ack = '1' then
wb_ctrl_stb <= '0';
end if;
end if;
end if;
end process;

-- Ctrl bus wishbone OUT signals. XXX Consider adding latch on
-- CSR response to help timing
wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1' wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1'
else wb_init_out.ack; else wb_init_out.ack;
wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1' wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1'
@ -186,56 +450,531 @@ begin
wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else
'0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack; '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack;

-- Generate a cache RAM for each way
rams: for i in 0 to NUM_WAYS-1 generate
signal do_read : std_ulogic;
signal do_write : std_ulogic;
signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal wr_data : std_ulogic_vector(DRAM_DBITS-1 downto 0);
signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
signal dout : cache_row_t;
way: entity work.cache_ram
generic map (
ADD_BUF => true
port map (
clk => system_clk,
rd_en => do_read,
rd_addr => rd_addr,
rd_data => dout,
wr_sel => wr_sel_m,
wr_addr => wr_addr,
wr_data => wr_data
-- Read port
do_read <= '1';
cache_out(i) <= dout;
rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));

-- Write mux: cache refills from DRAM or writes from Wishbone
if state = IDLE then
-- Write from wishbone
wr_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
wr_data <= req_wdata;
wr_sel <= req_we;
-- Refill from DRAM
wr_data <= user_port0_rdata_data;
wr_sel <= (others => '1');
wr_addr <= std_ulogic_vector(to_unsigned(refill_row, ROW_BITS));
end if;

-- Write enable logic
do_write <= '0';
if req_op = OP_STORE_HIT and req_hit_way = i then
do_write <= '1';
elsif user_port0_rdata_valid = '1' and refill_way = i then
do_write <= '1';
end if;

-- Mask write selects with do_write since BRAM doesn't always
-- have a global write-enable (Vivado generates TDP instead
-- of SDP when using one, thus doubling cache BRAM usage).
for i in 0 to ROW_SIZE-1 loop
wr_sel_m(i) <= wr_sel(i) and do_write;
end loop;

if TRACE and rising_edge(system_clk) then
if do_write = '1' then
report "cache write way:" & integer'image(i) &
" addr:" & to_hstring(wr_addr) &
" sel:" & to_hstring(wr_sel_m) &
" data:" & to_hstring(wr_data);
end if;
end if;
end process;
end generate;

-- Generate PLRUs
maybe_plrus: if NUM_WAYS > 1 generate
plrus: for i in 0 to NUM_LINES-1 generate
-- PLRU interface
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
signal plru_acc_en : std_ulogic;
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
plru : entity work.plru
generic map (
port map (
clk => system_clk,
rst => system_reset,
acc => plru_acc,
acc_en => plru_acc_en,
lru => plru_out

process(req_index, req_op, req_hit_way, plru_out)
-- PLRU interface
if (req_op = OP_LOAD_HIT or
req_op = OP_STORE_HIT) and req_index = i then
plru_acc_en <= '1';
plru_acc_en <= '0';
end if;
plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS));
plru_victim(i) <= plru_out;
end process;
end generate;
end generate;

-- Wishbone interface:
-- - Incoming wishbone request latch (to help with timing)
-- - Read response pipeline (to match BRAM output buffer delay)
-- - Stall generation
-- XXX TODO: Properly handle cyc drops before all acks are sent...
request_latch: process(system_clk)
if rising_edge(system_clk) then
-- We can latch a new request if we are idle (for now). We also
-- latch the absence of request. This is a pipeline that takes
-- one per-cycle unless non-IDLE.
if wb_out.stall = '0' then
-- Avoid constantly updating addr/data for unrelated requests
if wb_in.cyc = '1' then
wb_req <= wb_in;
wb_req.cyc <= wb_in.cyc;
wb_req.stb <= wb_in.stb;
end if;

if TRACE then
if wb_in.cyc = '1' and wb_in.stb = '1' then
report "latch new wb req ! addr:" & to_hstring(wb_in.adr) &
" we:" & std_ulogic'image(wb_in.we) &
" sel:" & to_hstring(wb_in.sel);
end if;
end if;
end if;
end if;
end process;

-- Read response pipeline
-- XXX Might have to put store acks in there too (see comment in wb_response)
read_pipe: process(system_clk)
if rising_edge(system_clk) then
read_ack_0 <= '1' when req_op = OP_LOAD_HIT else '0';
read_ad3_0 <= req_ad3;
read_way_0 <= req_hit_way;

read_ack_1 <= read_ack_0;
read_ad3_1 <= read_ad3_0;
read_way_1 <= read_way_0;

if TRACE then
if req_op = OP_LOAD_HIT then
report "Load hit addr:" & to_hstring(wb_req.adr) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) &
" way:" & integer'image(req_hit_way);
elsif req_op = OP_LOAD_MISS then
report "Load miss addr:" & to_hstring(wb_req.adr);
end if;
if read_ack_0 = '1' then
report "read data:" & to_hstring(cache_out(read_way_0));
end if;
end if;
end if;
end process;

wb_reponse: process(all)
variable rdata : std_ulogic_vector(DRAM_DBITS-1 downto 0);
variable store_done : std_ulogic;
-- Can we accept a store ? This is set when IDLE and the store
-- queue & command queue are not full.
-- --
-- Data bus wishbone to LiteDRAM native port -- Note: This is only used to control the WB request latch, stall
-- and store "early complete". We don't want to use this to control
-- cmd_valid to DRAM as this would create a circular dependency inside
-- LiteDRAM as cmd_ready I think is driven from cmd_valid.
-- --
-- Address bit 3 selects the top or bottom half of the data -- The state machine that controls the command queue must thus
-- bus (64-bit wishbone vs. 128-bit DRAM interface) -- reproduce this logic at least partially.
-- --
-- XXX TODO: Figure out how to pipeline this -- Note also that user_port0_cmd_ready from LiteDRAM is combinational
-- from user_port0_cmd_valid. IE. we won't know that LiteDRAM cannot
-- accept a command until we try to send one.
-- --
ad3 <= wb_in.adr(3); if state = IDLE then
accept_store <= user_port0_cmd_ready and storeq_wr_ready;

-- Wishbone port IN signals -- Corner case !!! The read acks pipeline takes two extra cycles
user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; -- which means a store ack can collide with a previous load hit
user_port0_cmd_we <= wb_in.we when state = CMD else '0'; -- ack. Thus we stall stores if we have a load ack pending.
user_port0_wdata_valid <= '1' when state = MWRITE else '0'; if read_ack_0 = '1' or read_ack_1 = '1' then
user_port0_rdata_ready <= '1' when state = MREAD else '0'; accept_store <= '0';
user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); end if;
user_port0_wdata_data <= wb_in.dat & wb_in.dat; else
user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else accept_store <= '0';
"00000000" & wb_in.sel; end if;

-- Generate stalls. For loads, we stall if we are going to take a load
-- miss or are in the middle of a refill. For stores, if we can't
-- accept it.
case state is
when IDLE =>
case req_op is
when OP_LOAD_MISS =>
wb_out.stall <= '1';
wb_out.stall <= not accept_store;
when others =>
wb_out.stall <= '0';
end case;
wb_out.stall <= '1';
end case;
-- Wishbone OUT signals -- Data out mux
wb_out.ack <= user_port0_wdata_ready when state = MWRITE else rdata := cache_out(read_way_1);
user_port0_rdata_valid when state = MREAD else '0'; wb_out.dat <= rdata(127 downto 64) when read_ad3_1 = '1' else rdata(63 downto 0);

wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else -- Early-complete stores on wishbone.
user_port0_rdata_data(63 downto 0); if req_op = OP_STORE_HIT or req_op = OP_STORE_MISS then
store_done := accept_store;
store_done := '0';
end if;

-- We don't do pipelining yet. -- Generate ACKs on read hits and store complete
wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; --
-- XXXX TODO: This can happen on store right behind loads !
-- This probably need to be fixed by putting store acks in
-- the same pipeline as the read acks. TOOD: Create a testbench
-- to exercise those corner cases as the core can't yet.
wb_out.ack <= read_ack_1 or store_done;
assert read_ack_0 = '0' or store_done = '0' report
"Read ack and store ack collision !"
severity failure;
end process;

-- DRAM user port State machine --
sm: process(system_clk) -- Cache request decode
request_decode: process(all)
variable valid : std_ulogic;
variable is_hit : std_ulogic;
variable hit_way : way_t;
begin begin
-- Extract line, row and tag from request
req_index <= get_index(wb_req.adr);
req_row <= get_row(wb_req.adr(REAL_ADDR_BITS-1 downto 0));
req_tag <= get_tag(wb_req.adr);

-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
req_laddr <= wb_req.adr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');

-- Do we have a valid request in the WB latch ?
if state = IDLE then
valid := wb_req.cyc and wb_req.stb;
valid := '0';
end if;

-- Store signals
req_ad3 <= wb_req.adr(3);
req_wdata <= wb_req.dat & wb_req.dat;
req_we <= wb_req.sel & "00000000" when req_ad3 = '1' else
"00000000" & wb_req.sel;

-- Test if pending request is a hit on any way
hit_way := 0;
is_hit := '0';
for i in way_t loop
if valid = '1' and cache_valids(req_index)(i) = '1' then
if read_tag(i, cache_tags(req_index)) = req_tag then
hit_way := i;
is_hit := '1';
end if;
end if;
end loop;

-- Generate the req op. We only allow OP_LOAD_* when in the
-- IDLE state as our PLRU and ACK generation rely on this,
-- stores are allowed in IDLE state.
req_op <= OP_NONE;
if valid = '1' then
if wb_req.we = '1' then
if is_hit = '1' then
req_op <= OP_STORE_HIT;
req_op <= OP_STORE_MISS;
end if;
if is_hit = '1' then
req_op <= OP_LOAD_HIT;
req_op <= OP_LOAD_MISS;
end if;
end if;
end if;
req_hit_way <= hit_way;
end process;

-- Store queue
-- For now, queue up to 16 stores
store_queue: entity work.sync_fifo
generic map (
port map (
clk => system_clk,
reset => system_reset,
rd_ready => storeq_rd_ready,
rd_valid => storeq_rd_valid,
rd_data => storeq_rd_data,
wr_ready => storeq_wr_ready,
wr_valid => storeq_wr_valid,
wr_data => storeq_wr_data

storeq_control : process(all)
variable stq_data : wishbone_data_type;
variable stq_sel : std_ulogic_vector(DRAM_SBITS-1 downto 0);
storeq_wr_data <= wb_req.dat & req_we;

-- Only accept store if we can send a command
if req_op = OP_STORE_HIT or req_op = OP_STORE_MISS then
storeq_wr_valid <= user_port0_cmd_ready;
storeq_wr_valid <= '0';
end if;

stq_data := storeq_rd_data(storeq_rd_data'left downto DRAM_SBITS);
stq_sel := storeq_rd_data(DRAM_SBITS-1 downto 0);
user_port0_wdata_data <= stq_data & stq_data;
user_port0_wdata_we <= stq_sel;
user_port0_wdata_valid <= storeq_rd_valid;
storeq_rd_ready <= user_port0_wdata_ready;

if TRACE then
if rising_edge(system_clk) then if rising_edge(system_clk) then
if req_op = OP_STORE_HIT then
report "Store hit to:" &
to_hstring(wb_req.adr(DRAM_ABITS+3 downto 0)) &
" data:" & to_hstring(req_wdata) &
" we:" & to_hstring(req_we) &
" V:" & std_ulogic'image(accept_store);
report "Store miss to:" &
to_hstring(wb_req.adr(DRAM_ABITS+3 downto 0)) &
" data:" & to_hstring(req_wdata) &
" we:" & to_hstring(req_we) &
" V:" & std_ulogic'image(accept_store);
end if;
if storeq_wr_valid = '1' and storeq_wr_ready = '1' then
report "storeq push " & to_hstring(storeq_wr_data);
end if;
if storeq_rd_valid = '1' and storeq_rd_ready = '1' then
report "storeq pop " & to_hstring(storeq_rd_data);
end if;
end if;
end if;
end process;

-- LiteDRAM command mux
dram_commands: process(all)
if state = IDLE and (req_op = OP_STORE_HIT or req_op = OP_STORE_MISS) then
-- For stores, forward signals directly. Only send command if
-- the FIFO can accept a store
user_port0_cmd_addr <= wb_req.adr(DRAM_ABITS+3 downto 4);
user_port0_cmd_we <= '1';
user_port0_cmd_valid <= storeq_wr_ready;
-- For loads, we route via a latch controlled by the refill machine
user_port0_cmd_addr <= refill_cmd_addr;
user_port0_cmd_valid <= refill_cmd_valid;
user_port0_cmd_we <= '0';
end if;
user_port0_rdata_ready <= '1'; -- Always 1
end process;

-- LiteDRAM refill machine
-- This handles the cache line refills
refill_machine : process(system_clk)
variable tagset : cache_tags_set_t;
variable cmds_done : boolean;
variable replace_way : way_t;
variable wait_qdrain : boolean;
if rising_edge(system_clk) then
-- On reset, clear all valid bits to force misses
if system_reset = '1' then if system_reset = '1' then
state <= CMD; for i in index_t loop
cache_valids(i) <= (others => '0');
end loop;
state <= IDLE;
refill_cmd_valid <= '0';
else else
-- Main state machine
case state is case state is
when CMD => when IDLE =>
if (user_port0_cmd_ready and user_port0_cmd_valid) = '1' then assert refill_cmd_valid = '0' report "refill cmd valid in IDLE state !"
state <= MWRITE when wb_in.we = '1' else MREAD; severity failure;

-- If NO_LS_OVERLAP is set, disallow a load miss if the store
-- queue still has data in it.
wait_qdrain := false;
wait_qdrain := storeq_rd_valid = '1';
end if;

-- We need to read a cache line
if req_op = OP_LOAD_MISS and not wait_qdrain then
-- Grab way to replace
replace_way := to_integer(unsigned(plru_victim(req_index)));

-- Force misses on that way while refilling that line
cache_valids(req_index)(replace_way) <= '0';

-- Store new tag in selected way
for i in 0 to NUM_WAYS-1 loop
if i = replace_way then
tagset := cache_tags(req_index);
write_tag(i, tagset, req_tag);
cache_tags(req_index) <= tagset;
end if; end if;
when MWRITE => end loop;
if user_port0_wdata_ready = '1' then
state <= CMD; -- Keep track of our index and way for subsequent stores
refill_index <= req_index;
refill_way <= replace_way;
refill_row <= get_row(req_laddr);

-- Prep for first DRAM read
-- XXX TODO: We could start a cycle early here by using
-- combo logic to generate the first command in
-- "dram_commands". In fact, we could make refill_cmd_addr
-- only contain the "counter" bits and wire it with the
-- other bits from req_laddr.
refill_cmd_addr <= req_laddr(DRAM_ABITS+3 downto 4);
refill_cmd_valid <= '1';

if TRACE then
report "refill addr " & to_hstring(req_laddr);
end if; end if;
when MREAD =>
-- Track that we had one request sent
end if;

-- Commands are all sent if user_port0_cmd_valid is 0
cmds_done := refill_cmd_valid = '0';

-- If we are still sending requests, was one accepted ?
if user_port0_cmd_ready = '1' and not cmds_done then
-- That was the last word ? We are done sending. Clear
-- command valid and set cmds_done so we can handle an
-- eventual last ack on the same cycle.
if TRACE then
report "got refill cmd ack !";
end if;
if is_last_row_addr(refill_cmd_addr) then
refill_cmd_valid <= '0';
cmds_done := true;
if TRACE then
report "all refill cmds done !";
end if;
-- Calculate the next row address
refill_cmd_addr <= next_row_addr(refill_cmd_addr);
if TRACE then
report "refill addr " &
end if;
end if;
end if;

-- Incoming read data processing
if user_port0_rdata_valid = '1' then if user_port0_rdata_valid = '1' then
state <= CMD; if TRACE then
report "got refill data ack !";
end if;
-- Check for completion
if cmds_done and is_last_row(refill_row) then
if TRACE then
report "all refill data done !";
end if;
-- Cache line is now valid
cache_valids(refill_index)(refill_way) <= '1';
-- We are done
state <= IDLE;
end if;

-- Increment store row counter
refill_row <= next_row(refill_row);
end if; end if;
end case; end case;
end if; end if;

@ -9,9 +9,9 @@
#define CONFIG_CPU_NOP "nop" #define CONFIG_CPU_NOP "nop"

#ifdef __SIM__ #ifdef __SIM__
#define MEMTEST_BUS_SIZE 16 #define MEMTEST_BUS_SIZE 512//16
#define MEMTEST_DATA_SIZE 16 #define MEMTEST_DATA_SIZE 1024//16
#define MEMTEST_ADDR_SIZE 16 #define MEMTEST_ADDR_SIZE 128//16
#endif #endif

@ -1,5 +1,5 @@
//--------------------------------------------------------------------------------
// Auto-generated by Migen (0d16e03) & LiteX (564d731a) on 2020-05-26 20:37:38 // Auto-generated by Migen (0d16e03) & LiteX (564d731a) on 2020-05-30 20:25:53
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
module litedram_core( module litedram_core(
input wire clk, input wire clk,

@ -1,5 +1,5 @@
//--------------------------------------------------------------------------------
// Auto-generated by Migen (0d16e03) & LiteX (564d731a) on 2020-05-26 20:37:40 // Auto-generated by Migen (0d16e03) & LiteX (564d731a) on 2020-05-30 20:25:55
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
module litedram_core( module litedram_core(
input wire clk, input wire clk,

@ -510,7 +510,7 @@ a64b5a7d14004a39
0000000000000000 0000000000000000
0000000000000000 0000000000000000
0000000000000000 0000000000000000
384296003c4c0001 384297003c4c0001
fbc1fff07c0802a6 fbc1fff07c0802a6
f8010010fbe1fff8 f8010010fbe1fff8
3be10020f821fe91 3be10020f821fe91
@ -519,11 +519,11 @@ f8c101a838800140
38c101987c651b78 38c101987c651b78
7fe3fb78f8e101b0 7fe3fb78f8e101b0
f92101c0f90101b8 f92101c0f90101b8
48000da5f94101c8 48000d65f94101c8
7c7e1b7860000000 7c7e1b7860000000
480008bd7fe3fb78 4800087d7fe3fb78
3821017060000000 3821017060000000
480013647fc3f378 480013247fc3f378
0100000000000000 0100000000000000
4e80002000000280 4e80002000000280
0000000000000000 0000000000000000
@ -531,67 +531,67 @@ f92101c0f90101b8
4e8000204c00012c 4e8000204c00012c
0000000000000000 0000000000000000
3c4c000100000000 3c4c000100000000
7c0802a63842955c 7c0802a63842965c
7d800026fbe1fff8 7d800026fbe1fff8
91810008f8010010 91810008f8010010
480007b1f821ff91 48000771f821ff91
3c62ffff60000000 3c62ffff60000000
4bffff3538637de8 4bffff3538637ca8
548400023880ffff 548400023880ffff
7c8026ea7c0004ac 7c8026ea7c0004ac
3fe0c0003c62ffff 3fe0c0003c62ffff
63ff000838637e08 63ff000838637cc8
3c62ffff4bffff11 3c62ffff4bffff11
38637e287bff0020 38637ce87bff0020
7c0004ac4bffff01 7c0004ac4bffff01
73e900017fe0feea 73e900017fe0feea
3c62ffff41820010 3c62ffff41820010
4bfffee538637e40 4bfffee538637d00
4d80000073e90002 4d80000073e90002
3c62ffff41820010 3c62ffff41820010
4bfffecd38637e48 4bfffecd38637d08
4e00000073e90004 4e00000073e90004
3c62ffff41820010 3c62ffff41820010
4bfffeb538637e50 4bfffeb538637d10
3be2804860000000 3bff7fa03fe2ffff
4bfffea57fe3fb78 4bfffea57fe3fb78
3c80c00041920028 3c80c00041920028
7884002060840010 7884002060840010
7c8026ea7c0004ac 7c8026ea7c0004ac
7884b2823c62ffff 7884b2823c62ffff
4bfffe7d38637e58 4bfffe7d38637d18
3c80c000418e004c 3c80c000418e004c
7884002060840018 7884002060840018
7c8026ea7c0004ac 7c8026ea7c0004ac
788465023c62ffff 788465023c62ffff
4bfffe5538637e78 4bfffe5538637d38
608400303c80c000 608400303c80c000
7c0004ac78840020 7c0004ac78840020
3c62ffff7c8026ea 3c62ffff7c8026ea
38637e987884b282 38637d587884b282
3d20c0004bfffe31 3d20c0004bfffe31
7929002061290020 7929002061290020
7d204eea7c0004ac 7d204eea7c0004ac
3c62ffff3c80000f 3c62ffff3c80000f
38637eb860844240 38637d7860844240
4bfffe057c892392 4bfffe057c892392
4bfffdfd7fe3fb78 4bfffdfd7fe3fb78
3ca2ffff418e0028 3ca2ffff418e0028
3c62ffff3c82ffff 3c62ffff3c82ffff
38847ee838a57ed8 38847da838a57d98
4bfffddd38637ef0 4bfffddd38637db0
60000000480004c1 6000000048000481
3c62ffff41920020 3c62ffff41920020
4bfffdc538637f20 4bfffdc538637de0
8181000838210070 8181000838210070
480011807d818120 480011407d818120
38637f383c62ffff 38637df83c62ffff
3c80f0004bfffda9 3c80f0004bfffda9
6084400038a0ffff 6084400038a0ffff
7884002054a50422 7884002054a50422
480008553c604000 480008153c604000
3c62ffff60000000 3c62ffff60000000
4bfffd7d38637f58 4bfffd7d38637e18
e801001038210070 e801001038210070
ebe1fff881810008 ebe1fff881810008
7d8181207c0803a6 7d8181207c0803a6
@ -605,138 +605,130 @@ ebe1fff881810008
4e8000207d20572a 4e8000207d20572a
0000000000000000 0000000000000000
3c4c000100000000 3c4c000100000000
7c0802a63842930c 7c0802a63842940c
614a08003d40c010 614a08003d40c010
794a002039200001 794a002039200001
f821ffa1f8010010 f821ffa1f8010010
7d20572a7c0004ac 7d20572a7c0004ac
3862802860000000 38637f803c62ffff
600000004bfffce1 600000004bfffce1
e801001038210060 e801001038210060
4e8000207c0803a6 4e8000207c0803a6
0100000000000000 0100000000000000
3c4c000100000080 3c4c000100000080
7c0802a6384292b4 7c0802a6384293b4
6129000c3d204000 38637e303c62ffff
3fc0aaaa48000ffd f821ff7148000fc1
f821ff713f804000 600000004bfffca1
63deaaaa3fa04000 3d40aaaa39000080
639c00043fe04000 3d2040007d0903a6
93df000063bd0008 91490000614aaaaa
93dd000093dc0000 4200fff839290004
4bfffce993c90000 4bfffce93f60aaaa
813f000060000000 3fa0aaaa60000000
7d29f278815c0000 637baaaa3f82ffff
7d2900347f8af000 3be000003bc00000
692900015529d97e 3b9c7e4063bdaaaa
7fff07b43be90001 3d3e10007b7b0020
7d3f07b4409e0008 792917647fc407b4
7f89f000813d0000 7f85e80080a90000
3bff0001419e000c 3bff0001419e001c
3d2040007fff07b4 7f83e3787f66db78
812900006129000c 4bfffc257fff07b4
2f8aaaaa6d2a5555 3bde000160000000
3bff0001419e000c 409effc82bbe0080
3fc055557fff07b4 3d40555539000080
3d2040003fa04000 3d2040007d0903a6
3f80400063de5555 91490000614a5555
63bd000461290008 4200fff839290004
93dd000093dc0000 600000004bfffc65
3d20400093c90000 3f82ffff3fa05555
93c900006129000c 3bc000003f605555
600000004bfffc4d 3b9c7e6063bd5555
7f89f000813c0000 3d3e1000637b5555
3bff0001419e000c 792917647fc407b4
813d00007fff07b4 7f85e80080a90000
2f8a55556d2a5555 3bff0001419e001c
3bff0001419e000c 7f83e3787f66db78
3d2040007fff07b4 4bfffba57fff07b4
8129000061290008 3bde000160000000
2f8a55556d2a5555 409effc82bbe0080
3bff0001419e000c 419e001c2fbf0000
3d2040007fff07b4 38a001003c62ffff
812900006129000c 38637e807fe4fb78
2f8a55556d2a5555 600000004bfffb79
3bff0001419e0028 3fc2ffff3c62ffff
3c62ffff7fff07b4 3bde7ec038637ea8
7fe4fb7838a00100 600000004bfffb61
4bfffb5538637f70 3d20400039400100
4800000c60000000 390000017d4903a6
409effe02fbf0000 3929000439480001
3ce0802039000004 9149fffc79480020
60e700037d0903a6 4bfffba94200fff0
392000013d404000 3940010060000000
7928f84278e70020 7d4903a639200000
7d2900d0792907e0 3d09100038c00001
7d293838394a0004 7908176439460001
912afffc7d294278 794600207d2407b4
4bfffb794200ffe4 7f8a284080a80000
3900000460000000 7fc3f378419e0014
7d0903a63ce08020 600000004bfffaf9
3d40400060e70003 392900014bffff98
392000013bc00000 3c62ffff4200ffcc
7928f84278e70020 4bfffadd38637ee0
7d2900d0792907e0 3920002060000000
7d2942787d293838 7d2903a639400000
7f884840810a0000 794800203d2a1000
3bde0001419e000c 394a000139290002
394a00047fde07b4 9109000079291764
2fbe00004200ffd4 4bfffb214200ffe8
3c62ffff419e001c 3f82ffff60000000
7fc4f37838a00004 3bc000003ba00000
4bfffa9538637f98 3d3d10003b9c7ef8
3d20400060000000 792917647fa607b4
6129000839400000 5529043e81290008
914900003ba00000 7d2507b47f893000
394000013d204000 3bde0001419e001c
914900006129000c 7f83e3787cc43378
394000023d204000 4bfffa657fde07b4
9149000061290010 3bbd000160000000
394000033d204000 409effc02bbd0020
9149000061290014 419e001c2fbe0000
600000004bfffabd 38a000203c62ffff
3940000039200004 38637f187fc4f378
3d2a10007d2903a6 600000004bfffa39
8129000879291764 386000007ffff214
7f8950005529043e 409e00b02f9f0000
3bbd0001419e000c 38637f403c62ffff
394a00017fbd07b4 600000004bfffa19
2fbd00004200ffdc 394001007c9602a6
3c62ffff419e001c 7d4903a678840020
7fa4eb7838a00004 3d49100039200000
4bfff9f538637fc0 794a176479280020
7ffefa1460000000 910a000039290001
7fffea143bc00000 7ff602a64200ffec
409e00a42f9f0000 3fe0000c7c9f2050
38637fe83c62ffff 7fff239663ff8000
600000004bfff9d1 600000004bfffa45
3f8040007f5602a6 7d3602a67bff0020
639c00043f604000 7929002039000100
3fa0400039200001 3d4040007d0903a6
3fc0400093db0000 394a0004810a0000
63bd0008913c0000 7cb602a64200fff8
63de000c39200002 3ca0000c7d254850
39200003913d0000 3c62ffff60a58000
7ff602a6913e0000 7fe4fb787ca54b96
600000004bfff9fd 78a5032038637f50
815b00007d3602a6 600000004bfff981
815d0000815c0000 3821009038600001
7cb602a6815e0000 0000000048000cd8
7ca5485038803200 0000058001000000
7ca42b967d3fd050 384290583c4c0001
3c62ffff7c844b96 3c62ffff7c0802a6
788404a078a504a0 48000c6138637fa8
3f60c010f821ff71 3f60c010f821ff71
637b10003be00000 637b10003be00000
4bfff8f57b7b0020 4bfff9357b7b0020
7c0004ac60000000 7c0004ac60000000
3f40c0107fe0df2a 3f40c0107fe0df2a
7b5a0020635a1008 7b5a0020635a1008
@ -756,22 +748,22 @@ f821ff713f804000
7d20ef2a7c0004ac 7d20ef2a7c0004ac
7c0004ac39200002 7c0004ac39200002
3860000f7d20f72a 3860000f7d20f72a
7c0004ac4bfffb09 7c0004ac4bfffb49
392000037fe0ef2a 392000037fe0ef2a
7d20f72a7c0004ac 7d20f72a7c0004ac
4bfffaed3860000f 4bfffb2d3860000f
7c0004ac39200006 7c0004ac39200006
3b8000017d20ef2a 3b8000017d20ef2a
7f80f72a7c0004ac 7f80f72a7c0004ac
4bfffacd3860000f 4bfffb0d3860000f
7c0004ac39200920 7c0004ac39200920
7c0004ac7d20ef2a 7c0004ac7d20ef2a
3860000f7fe0f72a 3860000f7fe0f72a
392004004bfffab1 392004004bfffaf1
7d20ef2a7c0004ac 7d20ef2a7c0004ac
7fe0f72a7c0004ac 7fe0f72a7c0004ac
4bfffa9538600003 4bfffad538600003
4bfffb294bfffad5 4bfffb694bfffb15
4082001c2c230000 4082001c2c230000
7f80df2a7c0004ac 7f80df2a7c0004ac
7f80d72a7c0004ac 7f80d72a7c0004ac
@ -780,27 +772,27 @@ f821ff713f804000
4bffffec38600001 4bffffec38600001
0100000000000000 0100000000000000
3c4c000100000680 3c4c000100000680
3d20c00038428d94 3d20c00038428ed4
6129200060000000 6129200060000000
f92280b879290020 f922801079290020
612900203d20c000 612900203d20c000
7c0004ac79290020 7c0004ac79290020
3d40001c7d204eea 3d40001c7d204eea
7d295392614a2000 7d295392614a2000
394a0018e94280b8 394a0018e9428010
7c0004ac3929ffff 7c0004ac3929ffff
4e8000207d2057ea 4e8000207d2057ea
0000000000000000 0000000000000000
3c4c000100000000 3c4c000100000000
6000000038428d34 6000000038428e74
39290010e92280b8 39290010e9228010
7d204eea7c0004ac 7d204eea7c0004ac
4082ffe871290008 4082ffe871290008
e94280b85469063e e94280105469063e
7d2057ea7c0004ac 7d2057ea7c0004ac
000000004e800020 000000004e800020
0000000000000000 0000000000000000
38428cf03c4c0001 38428e303c4c0001
fbc1fff07c0802a6 fbc1fff07c0802a6
3bc3fffffbe1fff8 3bc3fffffbe1fff8
f821ffd1f8010010 f821ffd1f8010010
@ -874,7 +866,7 @@ f924000039290002
7c6307b43863ffe0 7c6307b43863ffe0
000000004e800020 000000004e800020
0000000000000000 0000000000000000
38428aa03c4c0001 38428be03c4c0001
3d2037367c0802a6 3d2037367c0802a6
612935347d908026 612935347d908026
65293332792907c6 65293332792907c6
@ -908,7 +900,7 @@ fbfd00007fe9fa14
4bfffff07d29f392 4bfffff07d29f392
0300000000000000 0300000000000000
3c4c000100000580 3c4c000100000580
7c0802a638428994 7c0802a638428ad4
f821ffb1480006e9 f821ffb1480006e9
7c7f1b78eb630000 7c7f1b78eb630000
7cbd2b787c9c2378 7cbd2b787c9c2378
@ -924,7 +916,7 @@ f821ffb1480006e9
4bffffb8f93f0000 4bffffb8f93f0000
0100000000000000 0100000000000000
3c4c000100000580 3c4c000100000580
7c0802a638428914 7c0802a638428a54
f821ffa148000661 f821ffa148000661
7c9b23787c7d1b78 7c9b23787c7d1b78
388000007ca32b78 388000007ca32b78
@ -955,16 +947,16 @@ e95d00009b270000
f95d0000394a0001 f95d0000394a0001
000000004bffffa8 000000004bffffa8
0000078001000000 0000078001000000
384288183c4c0001 384289583c4c0001
480005397c0802a6 480005397c0802a6
7c741b79f821fed1 7c741b79f821fed1
38600000f8610060 38600000f8610060
2fa4000041820068 2fa4000041820068
39210040419e0060 39210040419e0060
3ac4ffff60000000 3ac4ffff3e42ffff
f92100703b410020 f92100703b410020
3ae0000060000000 3ae0000060000000
3a428068392280b0 3a527fc039228008
f92100783ba10060 f92100783ba10060
ebc1006089250000 ebc1006089250000
419e00102fa90000 419e00102fa90000
@ -1196,16 +1188,35 @@ e8010010ebc1fff0
20676e69746f6f42 20676e69746f6f42
415244206d6f7266 415244206d6f7266
0000000a2e2e2e4d 0000000a2e2e2e4d
20747365746d654d 20747365746d654d
6c69616620737562 6c69616620737562
252f6425203a6465 252f6425203a6465
73726f7272652064 73726f7272652064
000000000000000a 000000000000000a
20747365746d654d 20747365746d656d
6961662061746164 0a2e2e2e61746164
2f6425203a64656c 0000000000000000
726f727265206425 783020617461645b
0000000000000a73 7830203a5d783025
20747365746d654d 20747365746d654d
6961662072646461 6961662072646461
2f6425203a64656c 2f6425203a64656c

@ -1,5 +1,5 @@
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
// Auto-generated by Migen (0d16e03) & LiteX (564d731a) on 2020-05-26 20:37:42 // Auto-generated by Migen (0d16e03) & LiteX (564d731a) on 2020-05-30 20:25:57
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
module litedram_core( module litedram_core(
input wire clk, input wire clk,

@ -48,6 +48,7 @@ filesets:
- soc.vhdl - soc.vhdl
- xics.vhdl - xics.vhdl
- syscon.vhdl - syscon.vhdl
- sync_fifo.vhdl
file_type : vhdlSource-2008 file_type : vhdlSource-2008

fpga: fpga:

@ -0,0 +1,163 @@
-- Synchronous FIFO with a protocol similar to AXI
-- The outputs are generated combinationally from the inputs
-- in order to allow for back-to-back transfers with the type
-- of flow control used by busses lite AXI, pipelined WB or
-- LiteDRAM native port when the FIFO is full.
-- That means that care needs to be taken by the user not to
-- generate the inputs combinationally from the outputs otherwise
-- it would create a logic loop.
-- If breaking that loop is required, a stash buffer could be
-- added to break the flow control "loop" between the read and
-- the write port.
library ieee;
use ieee.std_logic_1164.all;

library work;
use work.utils.all;

entity sync_fifo is
-- Fifo depth in entries
DEPTH : natural := 64;

-- Fifo width in bits
WIDTH : natural := 32;

-- When INIT_ZERO is set, the memory is pre-initialized to 0's
INIT_ZERO : boolean := false
-- Control lines:
clk : in std_ulogic;
reset : in std_ulogic;

-- Write port
wr_ready : out std_ulogic;
wr_valid : in std_ulogic;
wr_data : in std_ulogic_vector(WIDTH - 1 downto 0);

-- Read port
rd_ready : in std_ulogic;
rd_valid : out std_ulogic;
rd_data : out std_ulogic_vector(WIDTH - 1 downto 0)
end entity sync_fifo;

architecture behaviour of sync_fifo is

subtype data_t is std_ulogic_vector(WIDTH - 1 downto 0);
type memory_t is array(0 to DEPTH - 1) of data_t;

function init_mem return memory_t is
variable m : memory_t;
if INIT_ZERO then
for i in 0 to DEPTH - 1 loop
m(i) := (others => '0');
end loop;
end if;
return m;
end function;

signal memory : memory_t := init_mem;

subtype index_t is integer range 0 to DEPTH - 1;
signal rd_idx : index_t;
signal rd_next : index_t;
signal wr_idx : index_t;
signal wr_next : index_t;

function next_index(idx : index_t) return index_t is
variable r : index_t;
if ispow2(DEPTH) then
r := (idx + 1) mod DEPTH;
r := idx + 1;
if r = DEPTH then
r := 0;
end if;
end if;
return r;
end function;
type op_t is (OP_POP, OP_PUSH);
signal op_prev : op_t := OP_POP;
signal op_next : op_t;

signal full, empty : std_ulogic;
signal push, pop : std_ulogic;

-- Current state at last clock edge
empty <= '1' when rd_idx = wr_idx and op_prev = OP_POP else '0';
full <= '1' when rd_idx = wr_idx and op_prev = OP_PUSH else '0';

-- We can accept new data if we aren't full or we are but
-- the read port is going to accept data this cycle
wr_ready <= rd_ready or not full;

-- We can provide data if we aren't empty or we are but
-- the write port is going to provide data this cycle
rd_valid <= wr_valid or not empty;

-- Internal control signals
push <= wr_ready and wr_valid;
pop <= rd_ready and rd_valid;

-- Next state
rd_next <= next_index(rd_idx) when pop = '1' else rd_idx;
wr_next <= next_index(wr_idx) when push = '1' else wr_idx;
with push & pop select op_next <=
OP_PUSH when "10",
OP_POP when "01",
op_prev when others;

-- Read port output
rd_data <= memory(rd_idx) when empty = '0' else wr_data;

-- Read counter
reader: process(clk)
if rising_edge(clk) then
if reset = '1' then
rd_idx <= 0;
rd_idx <= rd_next;
end if;
end if;
end process;

-- Write counter and memory write
producer: process(clk)
if rising_edge(clk) then
if reset = '1' then
wr_idx <= 0;
wr_idx <= wr_next;

if push = '1' then
memory(wr_idx) <= wr_data;
end if;
end if;
end if;
end process;

-- Previous op latch used for generating empty/full
op: process(clk)
if rising_edge(clk) then
if reset = '1' then
op_prev <= OP_POP;
op_prev <= op_next;
end if;
end if;
end process;

end architecture behaviour;