dcache: Implement the dcbz instruction

This adds logic to dcache and loadstore1 to implement dcbz.  For now
it zeroes a single cache line (by default 64 bytes), not 128 bytes
like IBM Power processors do.

The dcbz operation is performed much like a load miss, except that
we are writing zeroes to memory instead of reading.  As each ack
comes back, we write zeroes to the BRAM instead of data from memory.
In this way we zero the line in memory and also zero the line of
cache memory, establishing the line in the cache if it wasn't already
resident.  If it was already resident then we overwrite the existing
line in the cache.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/166/head
Paul Mackerras 5 years ago
parent 167e37d667
commit 041d6bef60

@ -236,6 +236,7 @@ package common is
type Loadstore1ToDcacheType is record type Loadstore1ToDcacheType is record
valid : std_ulogic; valid : std_ulogic;
load : std_ulogic; load : std_ulogic;
dcbz : std_ulogic;
nc : std_ulogic; nc : std_ulogic;
reserve : std_ulogic; reserve : std_ulogic;
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);

@ -581,8 +581,12 @@ begin
wr_data <= r0.data; wr_data <= r0.data;
wr_sel <= r0.byte_sel; wr_sel <= r0.byte_sel;
else else
-- Otherwise, we might be doing a reload -- Otherwise, we might be doing a reload or a DCBZ
wr_data <= wishbone_in.dat; if r1.req.dcbz = '1' then
wr_data <= (others => '0');
else
wr_data <= wishbone_in.dat;
end if;
wr_sel <= (others => '1'); wr_sel <= (others => '1');
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS)); wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
end if; end if;
@ -718,18 +722,54 @@ begin
r1.wb.we <= '0'; r1.wb.we <= '0';
r1.state <= NC_LOAD_WAIT_ACK; r1.state <= NC_LOAD_WAIT_ACK;


when OP_STORE_HIT | OP_STORE_MISS => when OP_STORE_HIT | OP_STORE_MISS =>
r1.wb.sel <= r0.byte_sel; if r0.dcbz = '0' then
r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000"; r1.wb.sel <= r0.byte_sel;
r1.wb.dat <= r0.data; r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000";
if cancel_store = '0' then r1.wb.dat <= r0.data;
if cancel_store = '0' then
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
r1.wb.we <= '1';
r1.state <= STORE_WAIT_ACK;
else
r1.stcx_fail <= '1';
r1.state <= IDLE;
end if;
else
-- dcbz is handled much like a load miss except
-- that we are writing to memory instead of reading
r1.store_index <= req_index;
r1.store_row <= get_row(req_laddr);

if req_op = OP_STORE_HIT then
r1.store_way <= req_hit_way;
else
r1.store_way <= replace_way;

-- Force misses on the victim way while zeroing
cache_valids(req_index)(replace_way) <= '0';

-- Store new tag in selected way
for i in 0 to NUM_WAYS-1 loop
if i = replace_way then
tagset := cache_tags(req_index);
write_tag(i, tagset, req_tag);
cache_tags(req_index) <= tagset;
end if;
end loop;
end if;

-- Set up for wishbone writes
r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0);
r1.wb.sel <= (others => '1');
r1.wb.we <= '1';
r1.wb.dat <= (others => '0');
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';
r1.wb.we <= '1';
r1.state <= STORE_WAIT_ACK; -- Handle the rest like a load miss
else r1.state <= RELOAD_WAIT_ACK;
r1.stcx_fail <= '1';
r1.state <= IDLE;
end if; end if;


-- OP_NONE and OP_BAD do nothing -- OP_NONE and OP_BAD do nothing
@ -766,7 +806,7 @@ begin
-- not idle, which we don't currently know how to deal -- not idle, which we don't currently know how to deal
-- with. -- with.
-- --
if r1.store_row = get_row(r1.req.addr) then if r1.store_row = get_row(r1.req.addr) and r1.req.dcbz = '0' then
r1.slow_data <= wishbone_in.dat; r1.slow_data <= wishbone_in.dat;
end if; end if;



@ -164,7 +164,7 @@ architecture behaviour of decode1 is
2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst 2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst
2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt 2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt
2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst 2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst
-- 2#1111110110# dcbz 2#1111110110# => (LDST, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- dcbz
2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu 2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu
2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo 2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo
2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu 2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu

@ -43,6 +43,7 @@ architecture behave of loadstore1 is
type reg_stage_t is record type reg_stage_t is record
-- latch most of the input request -- latch most of the input request
load : std_ulogic; load : std_ulogic;
dcbz : std_ulogic;
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0);
@ -198,8 +199,11 @@ begin
when IDLE => when IDLE =>
if l_in.valid = '1' then if l_in.valid = '1' then
v.load := '0'; v.load := '0';
v.dcbz := '0';
if l_in.op = OP_LOAD then if l_in.op = OP_LOAD then
v.load := '1'; v.load := '1';
elsif l_in.op = OP_DCBZ then
v.dcbz := '1';
end if; end if;
v.addr := lsu_sum; v.addr := lsu_sum;
v.write_reg := l_in.write_reg; v.write_reg := l_in.write_reg;
@ -293,6 +297,7 @@ begin
-- Update outputs to dcache -- Update outputs to dcache
d_out.valid <= req; d_out.valid <= req;
d_out.load <= v.load; d_out.load <= v.load;
d_out.dcbz <= v.dcbz;
d_out.nc <= v.nc; d_out.nc <= v.nc;
d_out.reserve <= v.reserve; d_out.reserve <= v.reserve;
d_out.addr <= addr; d_out.addr <= addr;

Loading…
Cancel
Save