|
|
@ -122,7 +122,7 @@ architecture rtl of dcache is
|
|
|
|
type cache_valids_t is array(index_t) of cache_way_valids_t;
|
|
|
|
type cache_valids_t is array(index_t) of cache_way_valids_t;
|
|
|
|
type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
|
|
|
|
type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
|
|
|
|
|
|
|
|
|
|
|
|
-- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
|
|
|
|
-- Storage. Hopefully implemented in LUTs
|
|
|
|
signal cache_tags : cache_tags_array_t;
|
|
|
|
signal cache_tags : cache_tags_array_t;
|
|
|
|
signal cache_tag_set : cache_tags_set_t;
|
|
|
|
signal cache_tag_set : cache_tags_set_t;
|
|
|
|
signal cache_valids : cache_valids_t;
|
|
|
|
signal cache_valids : cache_valids_t;
|
|
|
@ -317,15 +317,13 @@ architecture rtl of dcache is
|
|
|
|
tlb_hit_way : tlb_way_t;
|
|
|
|
tlb_hit_way : tlb_way_t;
|
|
|
|
tlb_hit_index : tlb_index_t;
|
|
|
|
tlb_hit_index : tlb_index_t;
|
|
|
|
|
|
|
|
|
|
|
|
-- 2-stage data buffer for data forwarded from writes to reads
|
|
|
|
-- data buffer for data forwarded from writes to reads
|
|
|
|
forward_data1 : std_ulogic_vector(63 downto 0);
|
|
|
|
forward_data : std_ulogic_vector(63 downto 0);
|
|
|
|
forward_data2 : std_ulogic_vector(63 downto 0);
|
|
|
|
forward_tag : cache_tag_t;
|
|
|
|
forward_sel1 : std_ulogic_vector(7 downto 0);
|
|
|
|
|
|
|
|
forward_valid1 : std_ulogic;
|
|
|
|
|
|
|
|
forward_way1 : way_t;
|
|
|
|
|
|
|
|
forward_row1 : row_t;
|
|
|
|
|
|
|
|
use_forward1 : std_ulogic;
|
|
|
|
|
|
|
|
forward_sel : std_ulogic_vector(7 downto 0);
|
|
|
|
forward_sel : std_ulogic_vector(7 downto 0);
|
|
|
|
|
|
|
|
forward_valid : std_ulogic;
|
|
|
|
|
|
|
|
forward_row : row_t;
|
|
|
|
|
|
|
|
data_out : std_ulogic_vector(63 downto 0);
|
|
|
|
|
|
|
|
|
|
|
|
-- Cache miss state (reload state machine)
|
|
|
|
-- Cache miss state (reload state machine)
|
|
|
|
state : state_t;
|
|
|
|
state : state_t;
|
|
|
@ -387,12 +385,16 @@ architecture rtl of dcache is
|
|
|
|
signal r0_valid : std_ulogic;
|
|
|
|
signal r0_valid : std_ulogic;
|
|
|
|
signal r0_stall : std_ulogic;
|
|
|
|
signal r0_stall : std_ulogic;
|
|
|
|
|
|
|
|
|
|
|
|
signal use_forward1_next : std_ulogic;
|
|
|
|
signal fwd_same_tag : std_ulogic;
|
|
|
|
signal use_forward2_next : std_ulogic;
|
|
|
|
signal use_forward_st : std_ulogic;
|
|
|
|
|
|
|
|
signal use_forward_rl : std_ulogic;
|
|
|
|
|
|
|
|
signal use_forward2 : std_ulogic;
|
|
|
|
|
|
|
|
|
|
|
|
-- Cache RAM interface
|
|
|
|
-- Cache RAM interface
|
|
|
|
type cache_ram_out_t is array(way_t) of cache_row_t;
|
|
|
|
type cache_ram_out_t is array(way_t) of cache_row_t;
|
|
|
|
signal cache_out : cache_ram_out_t;
|
|
|
|
signal cache_out : cache_ram_out_t;
|
|
|
|
|
|
|
|
signal ram_wr_data : cache_row_t;
|
|
|
|
|
|
|
|
signal ram_wr_select : std_ulogic_vector(ROW_SIZE - 1 downto 0);
|
|
|
|
|
|
|
|
|
|
|
|
-- PLRU output interface
|
|
|
|
-- PLRU output interface
|
|
|
|
type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
|
|
|
|
type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
|
|
|
@ -576,6 +578,7 @@ begin
|
|
|
|
r.doall := m_in.doall;
|
|
|
|
r.doall := m_in.doall;
|
|
|
|
r.tlbld := m_in.tlbld;
|
|
|
|
r.tlbld := m_in.tlbld;
|
|
|
|
r.mmu_req := '1';
|
|
|
|
r.mmu_req := '1';
|
|
|
|
|
|
|
|
r.d_valid := '1';
|
|
|
|
else
|
|
|
|
else
|
|
|
|
r.req := d_in;
|
|
|
|
r.req := d_in;
|
|
|
|
r.req.data := (others => '0');
|
|
|
|
r.req.data := (others => '0');
|
|
|
@ -583,21 +586,19 @@ begin
|
|
|
|
r.doall := '0';
|
|
|
|
r.doall := '0';
|
|
|
|
r.tlbld := '0';
|
|
|
|
r.tlbld := '0';
|
|
|
|
r.mmu_req := '0';
|
|
|
|
r.mmu_req := '0';
|
|
|
|
|
|
|
|
r.d_valid := '0';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
r.d_valid := '0';
|
|
|
|
|
|
|
|
if rst = '1' then
|
|
|
|
if rst = '1' then
|
|
|
|
r0_full <= '0';
|
|
|
|
r0_full <= '0';
|
|
|
|
elsif (r1.full = '0' and d_in.hold = '0') or r0_full = '0' then
|
|
|
|
elsif (r1.full = '0' and d_in.hold = '0') or r0_full = '0' then
|
|
|
|
r0 <= r;
|
|
|
|
r0 <= r;
|
|
|
|
r0_full <= r.req.valid;
|
|
|
|
r0_full <= r.req.valid;
|
|
|
|
end if;
|
|
|
|
elsif r0.d_valid = '0' then
|
|
|
|
-- Sample data the cycle after a request comes in from loadstore1.
|
|
|
|
-- Sample data the cycle after a request comes in from loadstore1.
|
|
|
|
-- If another request has come in already then the data will get
|
|
|
|
-- If this request is already moving into r1 then the data will get
|
|
|
|
-- put directly into req.data below.
|
|
|
|
-- put directly into req.data in the dcache_slow process below.
|
|
|
|
if r0.req.valid = '1' and r.req.valid = '0' and r0.d_valid = '0' and
|
|
|
|
|
|
|
|
r0.mmu_req = '0' then
|
|
|
|
|
|
|
|
r0.req.data <= d_in.data;
|
|
|
|
r0.req.data <= d_in.data;
|
|
|
|
r0.d_valid <= '1';
|
|
|
|
r0.d_valid <= r0.req.valid;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
end process;
|
|
|
@ -834,6 +835,8 @@ begin
|
|
|
|
variable hit_way_set : hit_way_set_t;
|
|
|
|
variable hit_way_set : hit_way_set_t;
|
|
|
|
variable rel_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
|
|
|
|
variable rel_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
|
|
|
|
variable rel_match : std_ulogic;
|
|
|
|
variable rel_match : std_ulogic;
|
|
|
|
|
|
|
|
variable fwd_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
|
|
|
|
|
|
|
|
variable fwd_match : std_ulogic;
|
|
|
|
begin
|
|
|
|
begin
|
|
|
|
-- Extract line, row and tag from request
|
|
|
|
-- Extract line, row and tag from request
|
|
|
|
req_index <= get_index(r0.req.addr);
|
|
|
|
req_index <= get_index(r0.req.addr);
|
|
|
@ -849,8 +852,10 @@ begin
|
|
|
|
hit_way := 0;
|
|
|
|
hit_way := 0;
|
|
|
|
is_hit := '0';
|
|
|
|
is_hit := '0';
|
|
|
|
rel_match := '0';
|
|
|
|
rel_match := '0';
|
|
|
|
|
|
|
|
fwd_match := '0';
|
|
|
|
if r0.req.virt_mode = '1' then
|
|
|
|
if r0.req.virt_mode = '1' then
|
|
|
|
rel_matches := (others => '0');
|
|
|
|
rel_matches := (others => '0');
|
|
|
|
|
|
|
|
fwd_matches := (others => '0');
|
|
|
|
for j in tlb_way_t loop
|
|
|
|
for j in tlb_way_t loop
|
|
|
|
hit_way_set(j) := 0;
|
|
|
|
hit_way_set(j) := 0;
|
|
|
|
s_hit := '0';
|
|
|
|
s_hit := '0';
|
|
|
@ -870,11 +875,15 @@ begin
|
|
|
|
if s_tag = r1.reload_tag then
|
|
|
|
if s_tag = r1.reload_tag then
|
|
|
|
rel_matches(j) := '1';
|
|
|
|
rel_matches(j) := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if s_tag = r1.forward_tag then
|
|
|
|
|
|
|
|
fwd_matches(j) := '1';
|
|
|
|
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
end loop;
|
|
|
|
if tlb_hit = '1' then
|
|
|
|
if tlb_hit = '1' then
|
|
|
|
is_hit := hit_set(tlb_hit_way);
|
|
|
|
is_hit := hit_set(tlb_hit_way);
|
|
|
|
hit_way := hit_way_set(tlb_hit_way);
|
|
|
|
hit_way := hit_way_set(tlb_hit_way);
|
|
|
|
rel_match := rel_matches(tlb_hit_way);
|
|
|
|
rel_match := rel_matches(tlb_hit_way);
|
|
|
|
|
|
|
|
fwd_match := fwd_matches(tlb_hit_way);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
else
|
|
|
|
s_tag := get_tag(r0.req.addr);
|
|
|
|
s_tag := get_tag(r0.req.addr);
|
|
|
@ -888,39 +897,28 @@ begin
|
|
|
|
if s_tag = r1.reload_tag then
|
|
|
|
if s_tag = r1.reload_tag then
|
|
|
|
rel_match := '1';
|
|
|
|
rel_match := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if s_tag = r1.forward_tag then
|
|
|
|
|
|
|
|
fwd_match := '1';
|
|
|
|
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
req_same_tag <= rel_match;
|
|
|
|
req_same_tag <= rel_match;
|
|
|
|
|
|
|
|
fwd_same_tag <= fwd_match;
|
|
|
|
-- See if the request matches the line currently being reloaded
|
|
|
|
|
|
|
|
if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index and
|
|
|
|
|
|
|
|
rel_match = '1' then
|
|
|
|
|
|
|
|
-- For a store, consider this a hit even if the row isn't valid
|
|
|
|
|
|
|
|
-- since it will be by the time we perform the store.
|
|
|
|
|
|
|
|
-- For a load, check the appropriate row valid bit.
|
|
|
|
|
|
|
|
is_hit := not r0.req.load or r1.rows_valid(req_row mod ROW_PER_LINE);
|
|
|
|
|
|
|
|
hit_way := replace_way;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- Whether to use forwarded data for a load or not
|
|
|
|
-- Whether to use forwarded data for a load or not
|
|
|
|
use_forward1_next <= '0';
|
|
|
|
use_forward_st <= '0';
|
|
|
|
if get_row(r1.req.real_addr) = req_row and r1.req.hit_way = hit_way then
|
|
|
|
use_forward_rl <= '0';
|
|
|
|
-- Only need to consider r1.write_bram here, since if we are
|
|
|
|
if r1.store_row = req_row and rel_match = '1' then
|
|
|
|
-- writing refill data here, then we don't have a cache hit this
|
|
|
|
-- Use the forwarding path if this cycle is a write to this row
|
|
|
|
-- cycle on the line being refilled. (There is the possibility
|
|
|
|
use_forward_st <= r1.write_bram;
|
|
|
|
-- that the load following the load miss that started the refill
|
|
|
|
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' then
|
|
|
|
-- could be to the old contents of the victim line, since it is a
|
|
|
|
use_forward_rl <= '1';
|
|
|
|
-- couple of cycles after the refill starts before we see the
|
|
|
|
end if;
|
|
|
|
-- updated cache tag. In that case we don't use the bypass.)
|
|
|
|
|
|
|
|
use_forward1_next <= r1.write_bram;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
use_forward2_next <= '0';
|
|
|
|
use_forward2 <= '0';
|
|
|
|
if r1.forward_row1 = req_row and r1.forward_way1 = hit_way then
|
|
|
|
if r1.forward_row = req_row and fwd_match = '1' then
|
|
|
|
use_forward2_next <= r1.forward_valid1;
|
|
|
|
use_forward2 <= r1.forward_valid;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
-- The way that matched on a hit
|
|
|
|
|
|
|
|
req_hit_way <= hit_way;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- The way to replace on a miss
|
|
|
|
-- The way to replace on a miss
|
|
|
|
if r1.write_tag = '1' then
|
|
|
|
if r1.write_tag = '1' then
|
|
|
|
replace_way <= to_integer(unsigned(plru_victim(r1.store_index)));
|
|
|
|
replace_way <= to_integer(unsigned(plru_victim(r1.store_index)));
|
|
|
@ -928,6 +926,23 @@ begin
|
|
|
|
replace_way <= r1.store_way;
|
|
|
|
replace_way <= r1.store_way;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- See if the request matches the line currently being reloaded
|
|
|
|
|
|
|
|
if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index and
|
|
|
|
|
|
|
|
rel_match = '1' then
|
|
|
|
|
|
|
|
-- Ignore is_hit from above, because a load miss writes the new tag
|
|
|
|
|
|
|
|
-- but doesn't clear the valid bit on the line before refilling it.
|
|
|
|
|
|
|
|
-- For a store, consider this a hit even if the row isn't valid
|
|
|
|
|
|
|
|
-- since it will be by the time we perform the store.
|
|
|
|
|
|
|
|
-- For a load, check the appropriate row valid bit; but also,
|
|
|
|
|
|
|
|
-- if use_forward_rl is 1 then we can consider this a hit.
|
|
|
|
|
|
|
|
is_hit := not r0.req.load or r1.rows_valid(req_row mod ROW_PER_LINE) or
|
|
|
|
|
|
|
|
use_forward_rl;
|
|
|
|
|
|
|
|
hit_way := replace_way;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- The way that matched on a hit
|
|
|
|
|
|
|
|
req_hit_way <= hit_way;
|
|
|
|
|
|
|
|
|
|
|
|
-- work out whether we have permission for this access
|
|
|
|
-- work out whether we have permission for this access
|
|
|
|
-- NB we don't yet implement AMR, thus no KUAP
|
|
|
|
-- NB we don't yet implement AMR, thus no KUAP
|
|
|
|
rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed);
|
|
|
|
rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed);
|
|
|
@ -1023,28 +1038,9 @@ begin
|
|
|
|
-- Return data for loads & completion control logic
|
|
|
|
-- Return data for loads & completion control logic
|
|
|
|
--
|
|
|
|
--
|
|
|
|
writeback_control: process(all)
|
|
|
|
writeback_control: process(all)
|
|
|
|
variable data_out : std_ulogic_vector(63 downto 0);
|
|
|
|
|
|
|
|
variable data_fwd : std_ulogic_vector(63 downto 0);
|
|
|
|
|
|
|
|
variable j : integer;
|
|
|
|
|
|
|
|
begin
|
|
|
|
begin
|
|
|
|
-- Use the bypass if are reading the row that was written 1 or 2 cycles
|
|
|
|
|
|
|
|
-- ago, including for the slow_valid = 1 case (i.e. completing a load
|
|
|
|
|
|
|
|
-- miss or a non-cacheable load).
|
|
|
|
|
|
|
|
if r1.use_forward1 = '1' then
|
|
|
|
|
|
|
|
data_fwd := r1.forward_data1;
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
data_fwd := r1.forward_data2;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
data_out := cache_out(r1.hit_way);
|
|
|
|
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
|
|
|
|
j := i * 8;
|
|
|
|
|
|
|
|
if r1.forward_sel(i) = '1' then
|
|
|
|
|
|
|
|
data_out(j + 7 downto j) := data_fwd(j + 7 downto j);
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
end loop;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
d_out.valid <= r1.ls_valid;
|
|
|
|
d_out.valid <= r1.ls_valid;
|
|
|
|
d_out.data <= data_out;
|
|
|
|
d_out.data <= r1.data_out;
|
|
|
|
d_out.store_done <= not r1.stcx_fail;
|
|
|
|
d_out.store_done <= not r1.stcx_fail;
|
|
|
|
d_out.error <= r1.ls_error;
|
|
|
|
d_out.error <= r1.ls_error;
|
|
|
|
d_out.cache_paradox <= r1.cache_paradox;
|
|
|
|
d_out.cache_paradox <= r1.cache_paradox;
|
|
|
@ -1052,7 +1048,7 @@ begin
|
|
|
|
-- Outputs to MMU
|
|
|
|
-- Outputs to MMU
|
|
|
|
m_out.done <= r1.mmu_done;
|
|
|
|
m_out.done <= r1.mmu_done;
|
|
|
|
m_out.err <= r1.mmu_error;
|
|
|
|
m_out.err <= r1.mmu_error;
|
|
|
|
m_out.data <= data_out;
|
|
|
|
m_out.data <= r1.data_out;
|
|
|
|
|
|
|
|
|
|
|
|
-- We have a valid load or store hit or we just completed a slow
|
|
|
|
-- We have a valid load or store hit or we just completed a slow
|
|
|
|
-- op such as a load miss, a NC load or a store
|
|
|
|
-- op such as a load miss, a NC load or a store
|
|
|
@ -1076,7 +1072,7 @@ begin
|
|
|
|
-- Request came from loadstore1...
|
|
|
|
-- Request came from loadstore1...
|
|
|
|
-- Load hit case is the standard path
|
|
|
|
-- Load hit case is the standard path
|
|
|
|
if r1.hit_load_valid = '1' then
|
|
|
|
if r1.hit_load_valid = '1' then
|
|
|
|
report "completing load hit data=" & to_hstring(data_out);
|
|
|
|
report "completing load hit data=" & to_hstring(r1.data_out);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
-- error cases complete without stalling
|
|
|
|
-- error cases complete without stalling
|
|
|
@ -1086,7 +1082,7 @@ begin
|
|
|
|
|
|
|
|
|
|
|
|
-- Slow ops (load miss, NC, stores)
|
|
|
|
-- Slow ops (load miss, NC, stores)
|
|
|
|
if r1.slow_valid = '1' then
|
|
|
|
if r1.slow_valid = '1' then
|
|
|
|
report "completing store or load miss data=" & to_hstring(data_out);
|
|
|
|
report "completing store or load miss data=" & to_hstring(r1.data_out);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
else
|
|
|
@ -1108,6 +1104,13 @@ begin
|
|
|
|
|
|
|
|
|
|
|
|
end process;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- RAM write data and select multiplexers
|
|
|
|
|
|
|
|
ram_wr_data <= r1.req.data when r1.write_bram = '1' else
|
|
|
|
|
|
|
|
wishbone_in.dat when r1.dcbz = '0' else
|
|
|
|
|
|
|
|
(others => '0');
|
|
|
|
|
|
|
|
ram_wr_select <= r1.req.byte_sel when r1.write_bram = '1' else
|
|
|
|
|
|
|
|
(others => '1');
|
|
|
|
|
|
|
|
|
|
|
|
--
|
|
|
|
--
|
|
|
|
-- Generate a cache RAM for each way. This handles the normal
|
|
|
|
-- Generate a cache RAM for each way. This handles the normal
|
|
|
|
-- reads, writes from reloads and the special store-hit update
|
|
|
|
-- reads, writes from reloads and the special store-hit update
|
|
|
@ -1132,7 +1135,7 @@ begin
|
|
|
|
generic map (
|
|
|
|
generic map (
|
|
|
|
ROW_BITS => ROW_BITS,
|
|
|
|
ROW_BITS => ROW_BITS,
|
|
|
|
WIDTH => wishbone_data_bits,
|
|
|
|
WIDTH => wishbone_data_bits,
|
|
|
|
ADD_BUF => true
|
|
|
|
ADD_BUF => false
|
|
|
|
)
|
|
|
|
)
|
|
|
|
port map (
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
clk => clk,
|
|
|
@ -1141,7 +1144,7 @@ begin
|
|
|
|
rd_data => dout,
|
|
|
|
rd_data => dout,
|
|
|
|
wr_sel => wr_sel_m,
|
|
|
|
wr_sel => wr_sel_m,
|
|
|
|
wr_addr => wr_addr,
|
|
|
|
wr_addr => wr_addr,
|
|
|
|
wr_data => wr_data
|
|
|
|
wr_data => ram_wr_data
|
|
|
|
);
|
|
|
|
);
|
|
|
|
process(all)
|
|
|
|
process(all)
|
|
|
|
begin
|
|
|
|
begin
|
|
|
@ -1157,37 +1160,13 @@ begin
|
|
|
|
-- For timing, the mux on wr_data/sel/addr is not dependent on anything
|
|
|
|
-- For timing, the mux on wr_data/sel/addr is not dependent on anything
|
|
|
|
-- other than the current state.
|
|
|
|
-- other than the current state.
|
|
|
|
--
|
|
|
|
--
|
|
|
|
wr_sel_m <= (others => '0');
|
|
|
|
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
|
|
|
|
|
|
|
|
|
|
|
|
do_write <= '0';
|
|
|
|
wr_sel_m <= (others => '0');
|
|
|
|
if r1.write_bram = '1' then
|
|
|
|
if i = replace_way and
|
|
|
|
-- Write store data to BRAM. This happens one cycle after the
|
|
|
|
(r1.write_bram = '1' or
|
|
|
|
-- store is in r0.
|
|
|
|
(r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1')) then
|
|
|
|
wr_data <= r1.req.data;
|
|
|
|
wr_sel_m <= ram_wr_select;
|
|
|
|
wr_sel <= r1.req.byte_sel;
|
|
|
|
|
|
|
|
wr_addr <= std_ulogic_vector(to_unsigned(get_row(r1.req.real_addr), ROW_BITS));
|
|
|
|
|
|
|
|
if i = r1.req.hit_way then
|
|
|
|
|
|
|
|
do_write <= '1';
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
-- Otherwise, we might be doing a reload or a DCBZ
|
|
|
|
|
|
|
|
if r1.dcbz = '1' then
|
|
|
|
|
|
|
|
wr_data <= (others => '0');
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
wr_data <= wishbone_in.dat;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
|
|
|
|
|
|
|
|
wr_sel <= (others => '1');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and replace_way = i then
|
|
|
|
|
|
|
|
do_write <= '1';
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- Mask write selects with do_write since BRAM doesn't
|
|
|
|
|
|
|
|
-- have a global write-enable
|
|
|
|
|
|
|
|
if do_write = '1' then
|
|
|
|
|
|
|
|
wr_sel_m <= wr_sel;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
end process;
|
|
|
|
end process;
|
|
|
@ -1198,20 +1177,60 @@ begin
|
|
|
|
-- It also handles error cases (TLB miss, cache paradox)
|
|
|
|
-- It also handles error cases (TLB miss, cache paradox)
|
|
|
|
--
|
|
|
|
--
|
|
|
|
dcache_fast_hit : process(clk)
|
|
|
|
dcache_fast_hit : process(clk)
|
|
|
|
|
|
|
|
variable j : integer;
|
|
|
|
|
|
|
|
variable sel : std_ulogic_vector(1 downto 0);
|
|
|
|
|
|
|
|
variable data_out : std_ulogic_vector(63 downto 0);
|
|
|
|
begin
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
|
|
|
if rising_edge(clk) then
|
|
|
|
if req_op /= OP_NONE then
|
|
|
|
if req_op /= OP_NONE then
|
|
|
|
report "op:" & op_t'image(req_op) &
|
|
|
|
report "op:" & op_t'image(req_op) &
|
|
|
|
" addr:" & to_hstring(r0.req.addr) &
|
|
|
|
" addr:" & to_hstring(r0.req.addr) &
|
|
|
|
" nc:" & std_ulogic'image(r0.req.nc) &
|
|
|
|
" nc:" & std_ulogic'image(r0.req.nc) &
|
|
|
|
" idx:" & integer'image(req_index) &
|
|
|
|
" idx:" & integer'image(req_index) &
|
|
|
|
" tag:" & to_hstring(req_tag) &
|
|
|
|
" tag:" & to_hstring(req_tag) &
|
|
|
|
" way: " & integer'image(req_hit_way);
|
|
|
|
" way: " & integer'image(req_hit_way);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if r0_valid = '1' then
|
|
|
|
if r0_valid = '1' then
|
|
|
|
r1.mmu_req <= r0.mmu_req;
|
|
|
|
r1.mmu_req <= r0.mmu_req;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- Bypass/forwarding multiplexer for load data.
|
|
|
|
|
|
|
|
-- Use the bypass if are reading the row of BRAM that was written 0 or 1
|
|
|
|
|
|
|
|
-- cycles ago, including for the slow_valid = 1 cases (i.e. completing a
|
|
|
|
|
|
|
|
-- load miss or a non-cacheable load), which are handled via the r1.full case.
|
|
|
|
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
|
|
|
|
if r1.full = '1' or use_forward_rl = '1' then
|
|
|
|
|
|
|
|
sel := '0' & r1.dcbz;
|
|
|
|
|
|
|
|
elsif use_forward_st = '1' and r1.req.byte_sel(i) = '1' then
|
|
|
|
|
|
|
|
sel := "01";
|
|
|
|
|
|
|
|
elsif use_forward2 = '1' and r1.forward_sel(i) = '1' then
|
|
|
|
|
|
|
|
sel := "10";
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
sel := "11";
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
j := i * 8;
|
|
|
|
|
|
|
|
case sel is
|
|
|
|
|
|
|
|
when "00" =>
|
|
|
|
|
|
|
|
data_out(j + 7 downto j) := wishbone_in.dat(j + 7 downto j);
|
|
|
|
|
|
|
|
when "01" =>
|
|
|
|
|
|
|
|
data_out(j + 7 downto j) := r1.req.data(j + 7 downto j);
|
|
|
|
|
|
|
|
when "10" =>
|
|
|
|
|
|
|
|
data_out(j + 7 downto j) := r1.forward_data(j + 7 downto j);
|
|
|
|
|
|
|
|
when others =>
|
|
|
|
|
|
|
|
data_out(j + 7 downto j) := cache_out(req_hit_way)(j + 7 downto j);
|
|
|
|
|
|
|
|
end case;
|
|
|
|
|
|
|
|
end loop;
|
|
|
|
|
|
|
|
r1.data_out <= data_out;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r1.forward_data <= ram_wr_data;
|
|
|
|
|
|
|
|
r1.forward_tag <= r1.reload_tag;
|
|
|
|
|
|
|
|
r1.forward_row <= r1.store_row;
|
|
|
|
|
|
|
|
r1.forward_sel <= ram_wr_select;
|
|
|
|
|
|
|
|
r1.forward_valid <= r1.write_bram;
|
|
|
|
|
|
|
|
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' then
|
|
|
|
|
|
|
|
r1.forward_valid <= '1';
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
-- Fast path for load/store hits. Set signals for the writeback controls.
|
|
|
|
-- Fast path for load/store hits. Set signals for the writeback controls.
|
|
|
|
r1.hit_way <= req_hit_way;
|
|
|
|
r1.hit_way <= req_hit_way;
|
|
|
|
r1.hit_index <= req_index;
|
|
|
|
r1.hit_index <= req_index;
|
|
|
@ -1263,38 +1282,11 @@ begin
|
|
|
|
-- operates at stage 1.
|
|
|
|
-- operates at stage 1.
|
|
|
|
--
|
|
|
|
--
|
|
|
|
dcache_slow : process(clk)
|
|
|
|
dcache_slow : process(clk)
|
|
|
|
variable stbs_done : boolean;
|
|
|
|
variable stbs_done : boolean;
|
|
|
|
variable req : mem_access_request_t;
|
|
|
|
variable req : mem_access_request_t;
|
|
|
|
variable acks : unsigned(2 downto 0);
|
|
|
|
variable acks : unsigned(2 downto 0);
|
|
|
|
begin
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
|
|
|
if rising_edge(clk) then
|
|
|
|
r1.use_forward1 <= use_forward1_next;
|
|
|
|
|
|
|
|
r1.forward_sel <= (others => '0');
|
|
|
|
|
|
|
|
if use_forward1_next = '1' then
|
|
|
|
|
|
|
|
r1.forward_sel <= r1.req.byte_sel;
|
|
|
|
|
|
|
|
elsif use_forward2_next = '1' then
|
|
|
|
|
|
|
|
r1.forward_sel <= r1.forward_sel1;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r1.forward_data2 <= r1.forward_data1;
|
|
|
|
|
|
|
|
if r1.write_bram = '1' then
|
|
|
|
|
|
|
|
r1.forward_data1 <= r1.req.data;
|
|
|
|
|
|
|
|
r1.forward_sel1 <= r1.req.byte_sel;
|
|
|
|
|
|
|
|
r1.forward_way1 <= r1.req.hit_way;
|
|
|
|
|
|
|
|
r1.forward_row1 <= get_row(r1.req.real_addr);
|
|
|
|
|
|
|
|
r1.forward_valid1 <= '1';
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
if r1.dcbz = '1' then
|
|
|
|
|
|
|
|
r1.forward_data1 <= (others => '0');
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
r1.forward_data1 <= wishbone_in.dat;
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
r1.forward_sel1 <= (others => '1');
|
|
|
|
|
|
|
|
r1.forward_way1 <= replace_way;
|
|
|
|
|
|
|
|
r1.forward_row1 <= r1.store_row;
|
|
|
|
|
|
|
|
r1.forward_valid1 <= '0';
|
|
|
|
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ev.dcache_refill <= '0';
|
|
|
|
ev.dcache_refill <= '0';
|
|
|
|
ev.load_miss <= '0';
|
|
|
|
ev.load_miss <= '0';
|
|
|
|
ev.store_miss <= '0';
|
|
|
|
ev.store_miss <= '0';
|
|
|
@ -1488,17 +1480,17 @@ begin
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
-- Incoming acks processing
|
|
|
|
-- Incoming acks processing
|
|
|
|
r1.forward_valid1 <= wishbone_in.ack;
|
|
|
|
|
|
|
|
if wishbone_in.ack = '1' then
|
|
|
|
if wishbone_in.ack = '1' then
|
|
|
|
r1.rows_valid(r1.store_row mod ROW_PER_LINE) <= '1';
|
|
|
|
r1.rows_valid(r1.store_row mod ROW_PER_LINE) <= '1';
|
|
|
|
-- If this is the data we were looking for, we can
|
|
|
|
-- If this is the data we were looking for, we can
|
|
|
|
-- complete the request next cycle.
|
|
|
|
-- complete the request next cycle.
|
|
|
|
-- Compare the whole address in case the request in
|
|
|
|
-- Compare the whole address in case the request in
|
|
|
|
-- r1.req is not the one that started this refill.
|
|
|
|
-- r1.req is not the one that started this refill.
|
|
|
|
if req.valid = '1' and req.same_tag = '1' and
|
|
|
|
-- (Cases where req comes from r0 are handled as a load
|
|
|
|
((r1.dcbz = '1' and req.dcbz = '1') or
|
|
|
|
-- hit.)
|
|
|
|
(r1.dcbz = '0' and req.op = OP_LOAD_MISS)) and
|
|
|
|
if r1.full = '1' and r1.req.same_tag = '1' and
|
|
|
|
r1.store_row = get_row(req.real_addr) then
|
|
|
|
((r1.dcbz = '1' and req.dcbz = '1') or r1.req.op = OP_LOAD_MISS) and
|
|
|
|
|
|
|
|
r1.store_row = get_row(r1.req.real_addr) then
|
|
|
|
r1.full <= '0';
|
|
|
|
r1.full <= '0';
|
|
|
|
r1.slow_valid <= '1';
|
|
|
|
r1.slow_valid <= '1';
|
|
|
|
if r1.mmu_req = '0' then
|
|
|
|
if r1.mmu_req = '0' then
|
|
|
@ -1506,8 +1498,6 @@ begin
|
|
|
|
else
|
|
|
|
else
|
|
|
|
r1.mmu_done <= '1';
|
|
|
|
r1.mmu_done <= '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
r1.forward_sel <= (others => '1');
|
|
|
|
|
|
|
|
r1.use_forward1 <= '1';
|
|
|
|
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
|
|
|
|
-- Check for completion
|
|
|
|
-- Check for completion
|
|
|
@ -1551,6 +1541,8 @@ begin
|
|
|
|
(req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then
|
|
|
|
(req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then
|
|
|
|
r1.wb.stb <= '1';
|
|
|
|
r1.wb.stb <= '1';
|
|
|
|
stbs_done := false;
|
|
|
|
stbs_done := false;
|
|
|
|
|
|
|
|
r1.store_way <= req.hit_way;
|
|
|
|
|
|
|
|
r1.store_row <= get_row(req.real_addr);
|
|
|
|
if req.op = OP_STORE_HIT then
|
|
|
|
if req.op = OP_STORE_HIT then
|
|
|
|
r1.write_bram <= '1';
|
|
|
|
r1.write_bram <= '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
@ -1592,8 +1584,6 @@ begin
|
|
|
|
else
|
|
|
|
else
|
|
|
|
r1.mmu_done <= '1';
|
|
|
|
r1.mmu_done <= '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
r1.forward_sel <= (others => '1');
|
|
|
|
|
|
|
|
r1.use_forward1 <= '1';
|
|
|
|
|
|
|
|
r1.wb.cyc <= '0';
|
|
|
|
r1.wb.cyc <= '0';
|
|
|
|
r1.wb.stb <= '0';
|
|
|
|
r1.wb.stb <= '0';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|