@ -264,6 +264,23 @@ architecture rtl of dcache is
-- subsequent load requests to the same line can be completed as
-- subsequent load requests to the same line can be completed as
-- soon as the necessary data comes in from memory, without
-- soon as the necessary data comes in from memory, without
-- waiting for the whole line to be read.
-- waiting for the whole line to be read.
--
-- Aligned loads and stores of a doubleword or less are atomic
-- because they are done in a single wishbone operation.
-- For quadword atomic loads and stores we rely on the wishbone
-- arbiter not interrupting access to a target once it has first
-- given access; i.e. once we have the main wishbone, no other
-- master gets access until we drop cyc.
--
-- Note on loads potentially hitting the victim line that is
-- currently being replaced: the new tag is available starting
-- with the 3rd cycle of RELOAD_WAIT_ACK state. As long as the
-- first read on the wishbone takes at least one cycle (i.e. the
-- ack doesn't arrive in the same cycle as stb was asserted),
-- r1.full will be true at least until that 3rd cycle and so a load
-- following a load miss can't hit on the old tag of the victim
-- line. As long as ack is not generated combinationally from
-- stb, this will be fine.
-- Stage 0 register, basically contains just the latched request
-- Stage 0 register, basically contains just the latched request
type reg_stage_0_t is record
type reg_stage_0_t is record
@ -307,12 +324,16 @@ architecture rtl of dcache is
full : std_ulogic; -- have uncompleted request
full : std_ulogic; -- have uncompleted request
mmu_req : std_ulogic; -- request is from MMU
mmu_req : std_ulogic; -- request is from MMU
req : mem_access_request_t;
req : mem_access_request_t;
atomic_more : std_ulogic; -- atomic request isn't finished
-- Cache hit state
-- Cache hit state
hit_way : way_t;
hit_way : way_t;
hit_load_valid : std_ulogic;
hit_load_valid : std_ulogic;
hit_index : index_t;
hit_index : index_t;
cache_hit : std_ulogic;
cache_hit : std_ulogic;
prev_hit : std_ulogic;
prev_way : way_t;
prev_hit_reload : std_ulogic;
-- TLB hit state
-- TLB hit state
tlb_hit : std_ulogic;
tlb_hit : std_ulogic;
@ -389,6 +410,7 @@ architecture rtl of dcache is
signal req_same_tag : std_ulogic;
signal req_same_tag : std_ulogic;
signal req_go : std_ulogic;
signal req_go : std_ulogic;
signal req_nc : std_ulogic;
signal req_nc : std_ulogic;
signal req_hit_reload : std_ulogic;
signal early_req_row : row_t;
signal early_req_row : row_t;
signal early_rd_valid : std_ulogic;
signal early_rd_valid : std_ulogic;
@ -927,6 +949,7 @@ begin
variable fwd_match : std_ulogic;
variable fwd_match : std_ulogic;
variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable snoop_match : std_ulogic;
variable snoop_match : std_ulogic;
variable hit_reload : std_ulogic;
begin
begin
-- Extract line, row and tag from request
-- Extract line, row and tag from request
rindex := get_index(r0.req.addr);
rindex := get_index(r0.req.addr);
@ -1071,6 +1094,7 @@ begin
assert not is_X(rindex);
assert not is_X(rindex);
assert not is_X(r1.store_index);
assert not is_X(r1.store_index);
end if;
end if;
hit_reload := '0';
if r1.state = RELOAD_WAIT_ACK and rel_match = '1' and
if r1.state = RELOAD_WAIT_ACK and rel_match = '1' and
rindex = r1.store_index then
rindex = r1.store_index then
-- Ignore is_hit from above, because a load miss writes the new tag
-- Ignore is_hit from above, because a load miss writes the new tag
@ -1085,11 +1109,23 @@ begin
r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or
r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or
use_forward_rl;
use_forward_rl;
hit_way := replace_way;
hit_way := replace_way;
hit_reload := is_hit;
elsif r0.req.load = '1' and r0.req.atomic_qw = '1' and r0.req.atomic_first = '0' and
r0.req.nc = '0' and perm_attr.nocache = '0' and r1.prev_hit = '1' then
-- For the second half of an atomic quadword load, just use the
-- same way as the first half, without considering whether the line
-- is valid; it is as if we had read the second dword at the same
-- time as the first dword, and the line was valid back then.
-- (Cases where the line is currently being reloaded are handled above.)
-- NB lq to noncacheable isn't required to be atomic per the ISA.
is_hit := '1';
hit_way := r1.prev_way;
end if;
end if;
-- The way that matched on a hit
-- The way that matched on a hit
req_hit_way <= hit_way;
req_hit_way <= hit_way;
req_is_hit <= is_hit;
req_is_hit <= is_hit;
req_hit_reload <= hit_reload;
-- work out whether we have permission for this access
-- work out whether we have permission for this access
-- NB we don't yet implement AMR, thus no KUAP
-- NB we don't yet implement AMR, thus no KUAP
@ -1418,6 +1454,8 @@ begin
r1.acks_pending <= to_unsigned(0, 3);
r1.acks_pending <= to_unsigned(0, 3);
r1.stalled <= '0';
r1.stalled <= '0';
r1.dec_acks <= '0';
r1.dec_acks <= '0';
r1.prev_hit <= '0';
r1.prev_hit_reload <= '0';
reservation.valid <= '0';
reservation.valid <= '0';
reservation.addr <= (others => '0');
reservation.addr <= (others => '0');
@ -1443,9 +1481,7 @@ begin
if req_go = '1' and access_ok = '1' and r0.req.load = '1' and
if req_go = '1' and access_ok = '1' and r0.req.load = '1' and
r0.req.reserve = '1' and r0.req.atomic_first = '1' then
r0.req.reserve = '1' and r0.req.atomic_first = '1' then
reservation.addr <= ra(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
reservation.addr <= ra(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
if req_is_hit = '1' then
reservation.valid <= req_is_hit and not req_snoop_hit;
reservation.valid <= not req_snoop_hit;
end if;
end if;
end if;
-- Do invalidations from snooped stores to memory
-- Do invalidations from snooped stores to memory
@ -1488,8 +1524,8 @@ begin
req.flush := r0.req.flush;
req.flush := r0.req.flush;
req.touch := r0.req.touch;
req.touch := r0.req.touch;
req.reserve := r0.req.reserve;
req.reserve := r0.req.reserve;
req.first_dw := r0.req.atomic_first;
req.first_dw := not r0.req.atomic_qw or r0.req.atomic_first;
req.last_dw := r0.req.atomic_last;
req.last_dw := not r0.req.atomic_qw or r0.req.atomic_last;
req.real_addr := ra;
req.real_addr := ra;
-- Force data to 0 for dcbz
-- Force data to 0 for dcbz
if r0.req.dcbz = '1' then
if r0.req.dcbz = '1' then
@ -1528,6 +1564,11 @@ begin
if req_op_load_miss = '1' or (r0.req.dcbz = '1' and req_is_hit = '0') then
if req_op_load_miss = '1' or (r0.req.dcbz = '1' and req_is_hit = '0') then
r1.choose_victim <= '1';
r1.choose_victim <= '1';
end if;
end if;
if req_go = '1' then
r1.prev_hit <= req_is_hit;
r1.prev_way <= req_hit_way;
r1.prev_hit_reload <= req_hit_reload;
end if;
-- Update count of pending acks
-- Update count of pending acks
acks := r1.acks_pending;
acks := r1.acks_pending;
@ -1549,6 +1590,7 @@ begin
r1.wb.sel <= req.byte_sel;
r1.wb.sel <= req.byte_sel;
r1.wb.dat <= req.data;
r1.wb.dat <= req.data;
r1.dcbz <= req.dcbz;
r1.dcbz <= req.dcbz;
r1.atomic_more <= not req.last_dw;
-- Keep track of our index and way for subsequent stores.
-- Keep track of our index and way for subsequent stores.
r1.store_index <= get_index(req.real_addr);
r1.store_index <= get_index(req.real_addr);
@ -1659,7 +1701,7 @@ begin
assert not is_X(r1.req.real_addr);
assert not is_X(r1.req.real_addr);
end if;
end if;
if r1.full = '1' and r1.req.same_tag = '1' and
if r1.full = '1' and r1.req.same_tag = '1' and
((r1.dcbz = '1' and req.dcbz = '1') or r1.req.op_lmiss = '1') and
((r1.dcbz = '1' and r1.req.dcbz = '1') or r1.req.op_lmiss = '1') and
r1.store_row = get_row(r1.req.real_addr) then
r1.store_row = get_row(r1.req.real_addr) then
r1.full <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.slow_valid <= '1';
@ -1668,12 +1710,9 @@ begin
else
else
r1.mmu_done <= '1';
r1.mmu_done <= '1';
end if;
end if;
-- NB: for lqarx, set the reservation on the first
-- NB: for lqarx, set the reservation on the first dword
-- dword so that a snooped store between the two
if r1.req.reserve = '1' and r1.req.first_dw = '1' then
-- dwords will kill the reservation.
if req.reserve = '1' and req.first_dw = '1' then
reservation.valid <= '1';
reservation.valid <= '1';
reservation.addr <= req.real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
end if;
end if;
end if;
end if;
@ -1690,6 +1729,10 @@ begin
cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '1';
cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '1';
ev.dcache_refill <= not r1.dcbz;
ev.dcache_refill <= not r1.dcbz;
-- Second half of a lq/lqarx can assume a hit on this line now
-- if the first half hit this line.
r1.prev_hit <= r1.prev_hit_reload;
r1.prev_way <= r1.store_way;
r1.state <= IDLE;
r1.state <= IDLE;
end if;
end if;
@ -1703,6 +1746,10 @@ begin
if wishbone_in.stall = '0' then
if wishbone_in.stall = '0' then
-- See if there is another store waiting to be done
-- See if there is another store waiting to be done
-- which is in the same real page.
-- which is in the same real page.
-- This could be either in r1.req or in r0.
-- Ignore store-conditionals, they have to go through
-- DO_STCX state, unless they are the second half of a
-- successful stqcx, which is handled here.
if req.valid = '1' then
if req.valid = '1' then
r1.wb.adr(SET_SIZE_BITS - ROW_OFF_BITS - 1 downto 0) <=
r1.wb.adr(SET_SIZE_BITS - ROW_OFF_BITS - 1 downto 0) <=
req.real_addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS);
req.real_addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS);
@ -1710,28 +1757,33 @@ begin
r1.wb.sel <= req.byte_sel;
r1.wb.sel <= req.byte_sel;
end if;
end if;
assert not is_X(acks);
assert not is_X(acks);
if acks < 7 and req.same_tag = '1' and req.dcbz = '0' and
r1.wb.stb <= '0';
req.op_store = '1' then
if req.op_store = '1' and req.same_tag = '1' and req.dcbz = '0' and
(req.reserve = '0' or r1.atomic_more = '1') then
if acks < 7 then
r1.wb.stb <= '1';
r1.wb.stb <= '1';
stbs_done := false;
stbs_done := false;
r1.store_way <= req.hit_way;
r1.store_way <= req.hit_way;
r1.store_row <= get_row(req.real_addr);
r1.store_row <= get_row(req.real_addr);
r1.write_bram <= req.is_hit;
r1.write_bram <= req.is_hit;
r1.atomic_more <= not req.last_dw;
r1.full <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.slow_valid <= '1';
-- Store requests never come from the MMU
-- Store requests never come from the MMU
r1.ls_valid <= '1';
r1.ls_valid <= '1';
stbs_done := false;
end if;
else
else
r1.wb.stb <= '0';
stbs_done := true;
stbs_done := true;
if req.valid = '1' then
r1.atomic_more <= '0';
end if;
end if;
end if;
end if;
end if;
-- Got ack ? See if complete.
-- Got ack ? See if complete.
if wishbone_in.ack = '1' then
if stbs_done and r1.atomic_more = '0' then
assert not is_X(acks);
assert not is_X(acks);
if stbs_done and acks = 1 then
if acks = 0 or (wishbone_in.ack = '1' and acks = 1) then
r1.state <= IDLE;
r1.state <= IDLE;
r1.wb.cyc <= '0';
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
r1.wb.stb <= '0';
@ -1770,31 +1822,30 @@ begin
r1.wb.cyc <= '0';
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
r1.wb.stb <= '0';
reservation.valid <= '0';
reservation.valid <= '0';
-- If this is the first half of a stqcx., the second half
-- will fail also because the reservation is not valid.
r1.state <= IDLE;
elsif r1.wb.cyc = '0' then
elsif r1.wb.cyc = '0' then
-- Right address and have reservation, so start the
-- Right address and have reservation, so start the
-- wishbone cycle
-- wishbone cycle
r1.wb.we <= '1';
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
r1.wb.stb <= '1';
else
elsif r1.wb.stb = '1' and wishbone_in.stall = '0' then
if wishbone_in.stall = '0' then
-- Store has been accepted, so now we can write the
-- Store has been accepted, so now we can write the
-- cache data RAM
-- cache data RAM and complete the request
r1.write_bram <= req.is_hit;
r1.write_bram <= r1.req.is_hit;
r1.wb.stb <= '0';
end if;
if wishbone_in.ack = '1' then
r1.state <= IDLE;
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
r1.wb.stb <= '0';
r1.full <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
r1.ls_valid <= '1';
-- For stqcx., kill the reservation on the last dword
if r1.req.last_dw = '1' then
reservation.valid <= '0';
reservation.valid <= '0';
end if;
-- For a stqcx, STORE_WAIT_ACK will issue the second half
end if;
-- without checking the reservation, which is what we want
-- given that the first half has gone out.
-- With r1.atomic_more set, STORE_WAIT_ACK won't exit to
-- IDLE state until it sees the second half.
r1.state <= STORE_WAIT_ACK;
end if;
end if;
when FLUSH_CYCLE =>
when FLUSH_CYCLE =>