@ -317,6 +317,7 @@ architecture rtl of dcache is
tlb_hit : std_ulogic;
tlb_hit_way : tlb_way_sig_t;
tlb_hit_index : tlb_index_sig_t;
tlb_victim : tlb_way_sig_t;
-- data buffer for data forwarded from writes to reads
forward_data : std_ulogic_vector(63 downto 0);
@ -342,6 +343,8 @@ architecture rtl of dcache is
acks_pending : unsigned(2 downto 0);
inc_acks : std_ulogic;
dec_acks : std_ulogic;
choose_victim : std_ulogic;
victim_way : way_t;
-- Signals to complete (possibly with error)
ls_valid : std_ulogic;
@ -398,8 +401,7 @@ architecture rtl of dcache is
signal ram_wr_select : std_ulogic_vector(ROW_SIZE - 1 downto 0);
-- PLRU output interface
type plru_out_t is array(0 to NUM_LINES-1) of std_ulogic_vector(WAY_BITS-1 downto 0);
signal plru_victim : plru_out_t;
signal plru_victim : way_t;
signal replace_way : way_t;
-- Wishbone read/write/cache write formatting signals
@ -423,8 +425,7 @@ architecture rtl of dcache is
signal tlb_miss : std_ulogic;
-- TLB PLRU output interface
type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
signal tlb_plru_victim : tlb_plru_out_t;
signal tlb_plru_victim : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
signal snoop_tag_set : cache_tags_set_t;
signal snoop_valid : std_ulogic;
@ -650,39 +651,49 @@ begin
end process;
-- Generate TLB PLRUs
maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
maybe_tlb_plrus : if TLB_NUM_WAYS > 1 generate
type tlb_plru_array is array(tlb_index_t) of std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
signal tlb_plru_ram : tlb_plru_array;
signal tlb_plru_cur : std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
signal tlb_plru_upd : std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
signal tlb_plru_acc : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
signal tlb_plru_out : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
begin
tlb_plrus: for i in 0 to TLB_SET_SIZE - 1 generate
-- TLB PLRU interface
signal tlb_plru_acc : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
signal tlb_plru_acc_en : std_ulogic;
signal tlb_plru_out : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
begin
tlb_plru : entity work.plru
generic map (
BITS => TLB_WAY_BITS
)
port map (
clk => clk,
rst => rst,
acc => tlb_plru_acc,
acc_en => tlb_plru_acc_en,
lru => tlb_plru_out
);
process(all)
begin
-- PLRU interface
if not is_X(r1.tlb_hit_index) and r1.tlb_hit_index = i then
tlb_plru_acc_en <= r1.tlb_hit;
assert not is_X(r1.tlb_hit_way);
else
tlb_plru_acc_en <= '0';
end if;
tlb_plru_acc <= std_ulogic_vector(r1.tlb_hit_way);
tlb_plru_victim(i) <= tlb_plru_out;
end process;
end generate;
tlb_plru : entity work.plrufn
generic map (
BITS => TLB_WAY_BITS
)
port map (
acc => tlb_plru_acc,
tree_in => tlb_plru_cur,
tree_out => tlb_plru_upd,
lru => tlb_plru_out
);
process(all)
begin
-- Read PLRU bits from array
if is_X(r1.tlb_hit_index) then
tlb_plru_cur <= (others => 'X');
else
tlb_plru_cur <= tlb_plru_ram(to_integer(r1.tlb_hit_index));
end if;
-- PLRU interface
tlb_plru_acc <= std_ulogic_vector(r1.tlb_hit_way);
tlb_plru_victim <= tlb_plru_out;
end process;
-- synchronous writes to TLB PLRU array
process(clk)
begin
if rising_edge(clk) then
if r1.tlb_hit = '1' then
assert not is_X(r1.tlb_hit_index) severity failure;
tlb_plru_ram(to_integer(r1.tlb_hit_index)) <= tlb_plru_upd;
end if;
end if;
end process;
end generate;
tlb_search : process(all)
@ -753,7 +764,7 @@ begin
if tlb_hit = '1' then
repl_way := tlb_hit_way;
else
repl_way := unsigned(tlb_plru_victim(to_integer(tlb_req_index)));
repl_way := unsigned(r1.tlb_victim);
end if;
assert not is_X(repl_way);
end if;
@ -770,39 +781,49 @@ begin
end process;
-- Generate PLRUs
maybe_plrus: if NUM_WAYS > 1 generate
maybe_plrus : if NUM_WAYS > 1 generate
type plru_array is array(0 to NUM_LINES-1) of std_ulogic_vector(NUM_WAYS - 2 downto 0);
signal plru_ram : plru_array;
signal plru_cur : std_ulogic_vector(NUM_WAYS - 2 downto 0);
signal plru_upd : std_ulogic_vector(NUM_WAYS - 2 downto 0);
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
begin
plrus: for i in 0 to NUM_LINES-1 generate
-- PLRU interface
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
signal plru_acc_en : std_ulogic;
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
begin
plru : entity work.plru
generic map (
BITS => WAY_BITS
)
port map (
clk => clk,
rst => rst,
acc => plru_acc,
acc_en => plru_acc_en,
lru => plru_out
);
process(all)
begin
-- PLRU interface
if not is_X(r1.hit_index) and r1.hit_index = to_unsigned(i, INDEX_BITS) then
plru_acc_en <= r1.cache_hit;
else
plru_acc_en <= '0';
end if;
plru_acc <= std_ulogic_vector(r1.hit_way);
plru_victim(i) <= plru_out;
end process;
end generate;
plru : entity work.plrufn
generic map (
BITS => WAY_BITS
)
port map (
acc => plru_acc,
tree_in => plru_cur,
tree_out => plru_upd,
lru => plru_out
);
process(all)
begin
-- Read PLRU bits from array
if is_X(r1.hit_index) then
plru_cur <= (others => 'X');
else
plru_cur <= plru_ram(to_integer(r1.hit_index));
end if;
-- PLRU interface
plru_acc <= std_ulogic_vector(r1.hit_way);
plru_victim <= unsigned(plru_out);
end process;
-- synchronous writes to PLRU array
process(clk)
begin
if rising_edge(clk) then
if r1.cache_hit = '1' then
assert not is_X(r1.hit_index) severity failure;
plru_ram(to_integer(r1.hit_index)) <= plru_upd;
end if;
end if;
end process;
end generate;
-- Cache tag RAM read port
@ -980,8 +1001,13 @@ begin
replace_way <= to_unsigned(0, WAY_BITS);
if NUM_WAYS > 1 then
if r1.write_tag = '1' then
assert not is_X(r1.store_index);
replace_way <= unsigned(plru_victim(to_integer(r1.store_index)));
if r1.choose_victim = '1' then
replace_way <= plru_victim;
else
-- Cache victim way was chosen earlier,
-- in the cycle after the miss was detected.
replace_way <= r1.victim_way;
end if;
else
replace_way <= r1.store_way;
end if;
@ -1305,8 +1331,6 @@ begin
end if;
-- Fast path for load/store hits. Set signals for the writeback controls.
r1.hit_way <= req_hit_way;
r1.hit_index <= req_index;
if req_op = OP_LOAD_HIT then
r1.hit_load_valid <= '1';
else
@ -1340,6 +1364,11 @@ begin
r1.tlb_hit <= tlb_hit;
r1.tlb_hit_way <= tlb_hit_way;
r1.tlb_hit_index <= tlb_req_index;
-- determine victim way in the TLB in the cycle after
-- we detect the TLB miss
if r1.ls_error = '1' then
r1.tlb_victim <= unsigned(tlb_plru_victim);
end if;
end if;
end process;
@ -1364,6 +1393,7 @@ begin
ev.load_miss <= '0';
ev.store_miss <= '0';
ev.dtlb_miss <= tlb_miss;
r1.choose_victim <= '0';
-- On reset, clear all valid bits to force misses
if rst = '1' then
@ -1460,6 +1490,17 @@ begin
end if;
end if;
-- Signals for PLRU update and victim selection
r1.hit_way <= req_hit_way;
r1.hit_index <= req_index;
-- Record victim way in the cycle after we see a load or dcbz miss
if r1.choose_victim = '1' then
r1.victim_way <= plru_victim;
end if;
if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then
r1.choose_victim <= '1';
end if;
-- Main state machine
case r1.state is
when IDLE =>