@ -35,7 +35,13 @@ entity icache is
-- Number of lines in a set
NUM_LINES : positive := 32;
-- Number of ways
NUM_WAYS : positive := 4
NUM_WAYS : positive := 4;
-- L1 ITLB number of entries (direct mapped)
TLB_SIZE : positive := 64;
-- L1 ITLB log_2(page_size)
TLB_LG_PGSZ : positive := 12;
-- Number of real address bits that we store
REAL_ADDR_BITS : positive := 56
);
port (
clk : in std_ulogic;
@ -44,6 +50,8 @@ entity icache is
i_in : in Fetch1ToIcacheType;
i_out : out IcacheToFetch2Type;
m_in : in MmuToIcacheType;
stall_out : out std_ulogic;
flush_in : in std_ulogic;
@ -78,10 +86,12 @@ architecture rtl of icache is
constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
-- ROW_OFF_BITS is the number of bits for the offset in a row
constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
-- INDEX_BITS is the number if bits to select a cache line
-- INDEX_BITS is the number of bits to select a cache line
constant INDEX_BITS : natural := log2(NUM_LINES);
-- SET_SIZE_BITS is the log base 2 of the set size
constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
-- TAG_BITS is the number of bits of the tag part of the address
constant TAG_BITS : natural := 64 - LINE_OFF_BITS - INDEX_BITS;
constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
-- WAY_BITS is the number of bits to select a way
constant WAY_BITS : natural := log2(NUM_WAYS);
@ -126,6 +136,27 @@ architecture rtl of icache is
attribute ram_style : string;
attribute ram_style of cache_tags : signal is "distributed";
-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;
subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;
attribute ram_style of itlb_tags : signal is "distributed";
attribute ram_style of itlb_ptes : signal is "distributed";
-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;
-- Cache reload state machine
type state_t is (IDLE, WAIT_ACK);
@ -142,6 +173,9 @@ architecture rtl of icache is
store_way : way_t;
store_index : index_t;
store_row : row_t;
-- TLB miss state
fetch_failed : std_ulogic;
end record;
signal r : reg_internal_t;
@ -155,6 +189,12 @@ architecture rtl of icache is
signal req_is_miss : std_ulogic;
signal req_laddr : std_ulogic_vector(63 downto 0);
signal tlb_req_index : tlb_index_t;
signal real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic;
signal access_ok : std_ulogic;
-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t;
@ -167,13 +207,13 @@ architecture rtl of icache is
-- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector(63 downto 0)) return index_t is
begin
return to_integer(unsigned(addr(63-TAG_BITS downto LINE_OFF_BITS)));
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)));
end;
-- Return the cache row index (data memory) for an address
function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is
begin
return to_integer(unsigned(addr(63-TAG_BITS downto ROW_OFF_BITS)));
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));
end;
-- Returns whether this is the last row of a line
@ -231,9 +271,9 @@ architecture rtl of icache is
end;
-- Get the tag value from the address
function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is
function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)) return cache_tag_t is
begin
return addr(63 downto 64-TAG_BITS);
return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
end;
-- Read a tag from a tag memory row
@ -249,6 +289,15 @@ architecture rtl of icache is
tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
end;
-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return tlb_index_t is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS)
xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS);
return to_integer(unsigned(hash));
end;
begin
assert LINE_SIZE mod ROW_SIZE = 0;
@ -260,9 +309,9 @@ begin
report "geometry bits don't add up" severity FAILURE;
assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
report "geometry bits don't add up" severity FAILURE;
assert (64 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
report "geometry bits don't add up" severity FAILURE;
assert (64 = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
report "geometry bits don't add up" severity FAILURE;
sim_debug: if SIM generate
@ -356,6 +405,69 @@ begin
end generate;
end generate;
-- TLB hit detection and real address generation
itlb_lookup : process(all)
variable pte : tlb_pte_t;
variable ttag : tlb_tag_t;
begin
tlb_req_index <= hash_ea(i_in.nia);
pte := itlb_ptes(tlb_req_index);
ttag := itlb_tags(tlb_req_index);
if i_in.virt_mode = '1' then
real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
i_in.nia(TLB_LG_PGSZ - 1 downto 0);
if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
ra_valid <= itlb_valids(tlb_req_index);
else
ra_valid <= '0';
end if;
eaa_priv <= pte(3);
else
real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
ra_valid <= '1';
eaa_priv <= '1';
end if;
-- no IAMR, so no KUEP support for now
priv_fault <= eaa_priv and not i_in.priv_mode;
access_ok <= ra_valid and not priv_fault;
end process;
-- iTLB update
itlb_update: process(clk)
variable tlbie : std_ulogic;
variable tlbia : std_ulogic;
variable tlbwe : std_ulogic;
variable wr_index : tlb_index_t;
begin
if rising_edge(clk) then
tlbie := '0';
tlbia := '0';
tlbwe := m_in.tlbld;
if m_in.tlbie = '1' then
if m_in.addr(11 downto 10) /= "00" then
tlbia := '1';
else
tlbie := '1';
end if;
end if;
wr_index := hash_ea(m_in.addr);
if rst = '1' or tlbia = '1' then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif tlbie = '1' then
-- clear entry regardless of hit or miss
itlb_valids(wr_index) <= '0';
elsif tlbwe = '1' then
itlb_tags(wr_index) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
itlb_ptes(wr_index) <= m_in.pte;
itlb_valids(wr_index) <= '1';
end if;
end if;
end process;
-- Cache hit detection, output to fetch2 and other misc logic
icache_comb : process(all)
variable is_hit : std_ulogic;
@ -364,12 +476,13 @@ begin
-- Extract line, row and tag from request
req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia);
req_tag <= get_tag(i_in.nia);
req_tag <= get_tag(real_addr);
-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) &
req_laddr <= (63 downto REAL_ADDR_BITS => '0') &
real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');
-- Test if pending request is a hit on any way
@ -385,8 +498,13 @@ begin
end loop;
-- Generate the "hit" and "miss" signals for the synchronous blocks
req_is_hit <= i_in.req and is_hit and not flush_in;
req_is_miss <= i_in.req and not is_hit and not flush_in;
if i_in.req = '1' and access_ok = '1' and flush_in = '0' then
req_is_hit <= is_hit;
req_is_miss <= not is_hit;
else
req_is_hit <= '0';
req_is_miss <= '0';
end if;
req_hit_way <= hit_way;
-- The way to replace on a miss
@ -404,9 +522,10 @@ begin
i_out.valid <= r.hit_valid;
i_out.nia <= r.hit_nia;
i_out.stop_mark <= r.hit_smark;
i_out.fetch_failed <= r.fetch_failed;
-- Stall fetch1 if we have a miss
stall_out <= not is_hit;
-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not (is_hit and access_ok);
-- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb;
@ -419,22 +538,21 @@ begin
-- On a hit, latch the request for the next cycle, when the BRAM data
-- will be available on the cache_out output of the corresponding way
--
r.hit_valid <= req_is_hit;
-- Send stop marks and NIA down regardless of validity
r.hit_smark <= i_in.stop_mark;
r.hit_nia <= i_in.nia;
if req_is_hit = '1' then
r.hit_way <= req_hit_way;
r.hit_nia <= i_in.nia;
r.hit_smark <= i_in.stop_mark;
r.hit_valid <= '1';
report "cache hit nia:" & to_hstring(i_in.nia) &
" IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) &
" way: " & integer'image(req_hit_way);
else
r.hit_valid <= '0';
-- Send stop marks down regardless of validity
r.hit_smark <= i_in.stop_mark;
" way:" & integer'image(req_hit_way) &
" RA:" & to_hstring(real_addr);
end if;
end if;
end process;
@ -468,10 +586,12 @@ begin
-- We need to read a cache line
if req_is_miss = '1' then
report "cache miss nia:" & to_hstring(i_in.nia) &
" IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" way:" & integer'image(replace_way) &
" tag:" & to_hstring(req_tag);
" tag:" & to_hstring(req_tag) &
" RA:" & to_hstring(real_addr);
-- Force misses on that way while reloading that line
cache_valids(req_index)(replace_way) <= '0';
@ -539,6 +659,13 @@ begin
end if;
end case;
end if;
-- TLB miss and protection fault processing
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
r.fetch_failed <= '0';
elsif i_in.req = '1' and access_ok = '0' then
r.fetch_failed <= '1';
end if;
end if;
end process;
end;