fetch1/icache1: Remove the use_previous logic

This removes logic that I added some time ago with the thought that it
would enable us to do prefetching in the icache.  This logic detects
when the fetch address is an odd multiple of 4 and the next address in
sequence from the previous cycle.  In that case the instruction we
want is in the output register of the icache RAM already so there is
no need to do another read or any icache tag or TLB lookup.

However, this logic adds complexity, and removing it improves timing,
so this removes it.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
fpu-constant
Paul Mackerras 3 years ago
parent cef3660e74
commit 49ec80ac3e

@ -89,9 +89,8 @@ begin
r_int.predicted_taken <= r_next_int.predicted_taken; r_int.predicted_taken <= r_next_int.predicted_taken;
r_int.pred_not_taken <= r_next_int.pred_not_taken; r_int.pred_not_taken <= r_next_int.pred_not_taken;
r_int.predicted_nia <= r_next_int.predicted_nia; r_int.predicted_nia <= r_next_int.predicted_nia;
r_int.rd_is_niap4 <= r_next.sequential; r_int.rd_is_niap4 <= r_next_int.rd_is_niap4;
end if; end if;
r.sequential <= r_next.sequential and advance_nia;
-- always send the up-to-date stop mark and req -- always send the up-to-date stop mark and req
r.stop_mark <= stop_in; r.stop_mark <= stop_in;
r.req <= not rst; r.req <= not rst;
@ -145,11 +144,11 @@ begin
begin begin
v := r; v := r;
v_int := r_int; v_int := r_int;
v.sequential := '0';
v.predicted := '0'; v.predicted := '0';
v.pred_ntaken := '0'; v.pred_ntaken := '0';
v_int.predicted_taken := '0'; v_int.predicted_taken := '0';
v_int.pred_not_taken := '0'; v_int.pred_not_taken := '0';
v_int.rd_is_niap4 := '0';


if rst = '1' then if rst = '1' then
if alt_reset_in = '1' then if alt_reset_in = '1' then
@ -180,7 +179,7 @@ begin
v.nia := r_int.predicted_nia; v.nia := r_int.predicted_nia;
v.predicted := '1'; v.predicted := '1';
else else
v.sequential := '1'; v_int.rd_is_niap4 := '1';
v.pred_ntaken := r_int.pred_not_taken; v.pred_ntaken := r_int.pred_not_taken;
v.nia := std_ulogic_vector(unsigned(r.nia) + 4); v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
if r_int.mode_32bit = '1' then if r_int.mode_32bit = '1' then

@ -212,7 +212,6 @@ architecture rtl of icache is
signal ra_valid : std_ulogic; signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic; signal priv_fault : std_ulogic;
signal access_ok : std_ulogic; signal access_ok : std_ulogic;
signal use_previous : std_ulogic;


-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
@ -397,7 +396,7 @@ begin
wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8); wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
end loop; end loop;
end if; end if;
do_read <= not (stall_in or use_previous); do_read <= not stall_in;
do_write <= '0'; do_write <= '0';
if wishbone_in.ack = '1' and replace_way = i then if wishbone_in.ack = '1' and replace_way = i then
do_write <= '1'; do_write <= '1';
@ -503,16 +502,6 @@ begin
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
variable hit_way : way_t; variable hit_way : way_t;
begin begin
-- i_in.sequential means that i_in.nia this cycle is 4 more than
-- last cycle. If we read more than 32 bits at a time, had a cache hit
-- last cycle, and we don't want the first 32-bit chunk, then we can
-- keep the data we read last cycle and just use that.
if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
use_previous <= i_in.req and i_in.sequential and r.hit_valid;
else
use_previous <= '0';
end if;

-- Extract line, row and tag from request -- Extract line, row and tag from request
req_index <= get_index(i_in.nia); req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia); req_row <= get_row(i_in.nia);
@ -542,7 +531,7 @@ begin
end loop; end loop;


-- Generate the "hit" and "miss" signals for the synchronous blocks -- Generate the "hit" and "miss" signals for the synchronous blocks
if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' and use_previous = '0' then if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then
req_is_hit <= is_hit; req_is_hit <= is_hit;
req_is_miss <= not is_hit; req_is_miss <= not is_hit;
else else
@ -576,7 +565,7 @@ begin
i_out.next_pred_ntaken <= i_in.pred_ntaken; i_out.next_pred_ntaken <= i_in.pred_ntaken;


-- Stall fetch1 if we have a miss on cache or TLB or a protection fault -- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not (is_hit and access_ok) and not use_previous; stall_out <= not (is_hit and access_ok);


-- Wishbone requests output (from the cache miss reload machine) -- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb; wishbone_out <= r.wb;
@ -588,8 +577,7 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
-- keep outputs to fetch2 unchanged on a stall -- keep outputs to fetch2 unchanged on a stall
-- except that flush or reset sets valid to 0 -- except that flush or reset sets valid to 0
-- If use_previous, keep the same data as last cycle and use the second half if stall_in = '1' then
if stall_in = '1' or use_previous = '1' then
if rst = '1' or flush_in = '1' then if rst = '1' or flush_in = '1' then
r.hit_valid <= '0'; r.hit_valid <= '0';
end if; end if;

Loading…
Cancel
Save