@ -19,9 +19,12 @@ entity icache is
clk : in std_ulogic;
rst : in std_ulogic;
i_in : in Fetch2ToIcacheType;
i_in : in Fetch1ToIcacheType;
i_out : out IcacheToFetch2Type;
stall_out : out std_ulogic;
flush_in : in std_ulogic;
wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out
);
@ -59,113 +62,194 @@ architecture rtl of icache is
subtype cacheline_tag_type is std_logic_vector(TAG_BITS-1 downto 0);
type cacheline_tag_array is array(0 to NUM_LINES-1) of cacheline_tag_type;
signal cachelines : cacheline_array := (others => (others => '0'));
signal tags : cacheline_tag_array := (others => (others => '0'));
signal tags_valid : std_ulogic_vector(NUM_LINES-1 downto 0) := (others => '0');
-- Storage. Hopefully "cachelines" is a BRAM, the rest is LUTs
signal cachelines : cacheline_array;
signal tags : cacheline_tag_array;
signal tags_valid : std_ulogic_vector(NUM_LINES-1 downto 0);
attribute ram_style : string;
attribute ram_style of cachelines : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of cachelines : signal is "power";
-- Cache reload state machine
type state_type is (IDLE, WAIT_ACK);
type reg_internal_type is record
state : state_type;
w : wishbone_master_out;
store_index : integer range 0 to (NUM_LINES-1);
store_word : integer range 0 to (LINE_SIZE-1);
-- Cache hit state (1 cycle BRAM access)
hit_line : cacheline_type;
hit_nia : std_ulogic_vector(63 downto 0);
hit_smark : std_ulogic;
hit_valid : std_ulogic;
-- Cache miss state (reload state machine)
state : state_type;
wb : wishbone_master_out;
store_index : integer range 0 to (NUM_LINES-1);
store_mask : std_ulogic_vector(LINE_SIZE_DW-1 downto 0);
end record;
signal r : reg_internal_type;
signal read_index : integer range 0 to NUM_LINES-1;
signal read_tag : std_ulogic_vector(63-OFFSET_BITS-INDEX_BITS downto 0);
signal read_miss : boolean;
-- Async signals decoding incoming requests
signal req_index : integer range 0 to NUM_LINES-1;
signal req_tag : std_ulogic_vector(TAG_BITS-1 downto 0);
signal req_word : integer range 0 to LINE_SIZE_DW*2-1;
signal req_is_hit : std_ulogic;
-- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector(63 downto 0)) return integer is
begin
return to_integer(unsigned(addr((OFFSET_BITS+INDEX_BITS-1) downto OFFSET_BITS)));
end;
function get_word(addr: std_ulogic_vector(63 downto 0); data: cacheline_type) return std_ulogic_vector is
variable word : integer;
-- Return the word index in a cache line for an address
function get_word(addr: std_ulogic_vector(63 downto 0)) return integer is
begin
return to_integer(unsigned(addr(OFFSET_BITS-1 downto 2)));
end;
-- Read a word in a cache line for an address
function read_word(word: integer; data: cacheline_type) return std_ulogic_vector is
begin
word := to_integer(unsigned(addr(OFFSET_BITS-1 downto 2)));
return data((word+1)*32-1 downto word*32);
return data((word+1)*32-1 downto word*32);
end;
-- Calculate the tag value from the address
function get_tag(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
begin
return addr(63 downto OFFSET_BITS+INDEX_BITS);
end;
begin
assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;
icache_read : process(all)
icache_comb : process(all)
begin
read_index <= get_index(i_in.addr);
read_tag <= get_tag(i_in.addr);
read_miss <= false;
i_out.ack <= '0';
i_out.insn <= get_word(i_in.addr, cachelines(read_index));
if i_in.req = '1' then
if (tags_valid(read_index) = '1') and (tags(read_index) = read_tag) then
-- report hit asynchronously
i_out.ack <= '1';
else
read_miss <= true;
end if;
end if;
-- Calculate next index and tag index
req_index <= get_index(i_in.nia);
req_tag <= get_tag(i_in.nia);
req_word <= get_word(i_in.nia);
-- Test if pending request is a hit
if tags(req_index) = req_tag then
req_is_hit <= tags_valid(req_index);
else
req_is_hit <= '0';
end if;
-- Output instruction from current cache line
--
-- Note: This is a mild violation of our design principle of having pipeline
-- stages output from a clean latch. In this case we output the result
-- of a mux. The alternative would be output an entire cache line
-- which I prefer not to do just yet.
--
i_out.valid <= r.hit_valid;
i_out.insn <= read_word(get_word(r.hit_nia), r.hit_line);
i_out.nia <= r.hit_nia;
i_out.stop_mark <= r.hit_smark;
-- This needs to match the latching of a new request in icache_hit
stall_out <= not req_is_hit;
-- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb;
end process;
wishbone_out <= r.w;
icache_hit : process(clk)
begin
if rising_edge(clk) then
-- Assume we have nothing valid first
r.hit_valid <= '0';
-- Are we free to latch a new request ?
--
-- Note: this test needs to match the equation for generating stall_out
--
if i_in.req = '1' and req_is_hit = '1' and flush_in = '0' then
-- Read the cache line (BRAM read port) and remember the NIA
r.hit_line <= cachelines(req_index);
r.hit_nia <= i_in.nia;
r.hit_smark <= i_in.stop_mark;
r.hit_valid <= '1';
report "cache hit nia:" & to_hstring(i_in.nia) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag);
end if;
icache_write : process(clk)
-- Flush requested ? discard...
if flush_in then
r.hit_valid <= '0';
end if;
end if;
end process;
icache_miss : process(clk)
variable store_dword : std_ulogic_vector(OFFSET_BITS-4 downto 0);
begin
if rising_edge(clk) then
if rst = '1' then
tags_valid <= (others => '0');
r.store_mask <= (others => '0');
r.state <= IDLE;
r.w.cyc <= '0';
r.w.stb <= '0';
end if;
r.wb.cyc <= '0';
r.wb.stb <= '0';
r.w.dat <= (others => '0');
r.w.sel <= "11111111";
r.w.we <= '0';
-- We only ever do reads on wishbone
r.wb.dat <= (others => '0');
r.wb.sel <= "11111111";
r.wb.we <= '0';
end if;
-- State machine
case r.state is
when IDLE =>
if read_miss = true then
-- We need to read a cache line
if i_in.req = '1' and req_is_hit = '0' then
report "cache miss nia:" & to_hstring(i_in.nia) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag);
r.state <= WAIT_ACK;
r.store_word <= 0;
r.store_index <= read_index;
r.store_mask <= (0 => '1', others => '0');
r.store_index <= req_index;
tags(read_index) <= read_tag;
tags_valid(read_index) <= '0';
-- Force misses while reloading that line
tags_valid(req_index) <= '0';
tags(req_index) <= req_tag;
r.w.adr <= i_in.addr(63 downto OFFSET_BITS) & (OFFSET_BITS-1 downto 0 => '0');
r.w.cyc <= '1';
r.w.stb <= '1';
-- Prep for first dword read
r.wb.adr <= i_in.nia(63 downto OFFSET_BITS) & (OFFSET_BITS-1 downto 0 => '0');
r.wb.cyc <= '1';
r.wb.stb <= '1';
end if;
when WAIT_ACK =>
if wishbone_in.ack = '1' then
cachelines(r.store_index)((r.store_word+1)*64-1 downto ((r.store_word)*64)) <= wishbone_in.dat;
r.store_word <= r.store_word + 1;
-- Store the current dword in both the cache
for i in 0 to LINE_SIZE_DW-1 loop
if r.store_mask(i) = '1' then
cachelines(r.store_index)(63 + i*64 downto i*64) <= wishbone_in.dat;
end if;
end loop;
if r.store_word = (LINE_SIZE_DW-1) then
-- That was the last word ? We are done
if r.store_mask(LINE_SIZE_DW-1) = '1' then
r.state <= IDLE;
tags_valid(r.store_index) <= '1';
r.w.cyc <= '0';
r.w.stb <= '0';
r.wb.cyc <= '0';
r.wb.stb <= '0';
else
r.w.adr(OFFSET_BITS-1 downto 3) <= std_ulogic_vector(to_unsigned(r.store_word+1, OFFSET_BITS-3));
store_dword := r.wb.adr(OFFSET_BITS-1 downto 3);
store_dword := std_ulogic_vector(unsigned(store_dword) + 1);
r.wb.adr(OFFSET_BITS-1 downto 3) <= store_dword;
end if;
-- Advance to next word
r.store_mask <= r.store_mask(LINE_SIZE_DW-2 downto 0) & '0';
end if;
end case;
end if;