diff --git a/common.vhdl b/common.vhdl index 65e40c1..61252bd 100644 --- a/common.vhdl +++ b/common.vhdl @@ -236,6 +236,7 @@ package common is type Loadstore1ToDcacheType is record valid : std_ulogic; load : std_ulogic; + dcbz : std_ulogic; nc : std_ulogic; reserve : std_ulogic; addr : std_ulogic_vector(63 downto 0); diff --git a/dcache.vhdl b/dcache.vhdl index 7e553bf..550298b 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -581,8 +581,12 @@ begin wr_data <= r0.data; wr_sel <= r0.byte_sel; else - -- Otherwise, we might be doing a reload - wr_data <= wishbone_in.dat; + -- Otherwise, we might be doing a reload or a DCBZ + if r1.req.dcbz = '1' then + wr_data <= (others => '0'); + else + wr_data <= wishbone_in.dat; + end if; wr_sel <= (others => '1'); wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS)); end if; @@ -718,18 +722,54 @@ begin r1.wb.we <= '0'; r1.state <= NC_LOAD_WAIT_ACK; - when OP_STORE_HIT | OP_STORE_MISS => - r1.wb.sel <= r0.byte_sel; - r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000"; - r1.wb.dat <= r0.data; - if cancel_store = '0' then + when OP_STORE_HIT | OP_STORE_MISS => + if r0.dcbz = '0' then + r1.wb.sel <= r0.byte_sel; + r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000"; + r1.wb.dat <= r0.data; + if cancel_store = '0' then + r1.wb.cyc <= '1'; + r1.wb.stb <= '1'; + r1.wb.we <= '1'; + r1.state <= STORE_WAIT_ACK; + else + r1.stcx_fail <= '1'; + r1.state <= IDLE; + end if; + else + -- dcbz is handled much like a load miss except + -- that we are writing to memory instead of reading + r1.store_index <= req_index; + r1.store_row <= get_row(req_laddr); + + if req_op = OP_STORE_HIT then + r1.store_way <= req_hit_way; + else + r1.store_way <= replace_way; + + -- Force misses on the victim way while zeroing + cache_valids(req_index)(replace_way) <= '0'; + + -- Store new tag in selected way + for i in 0 to NUM_WAYS-1 loop + if i = replace_way then + tagset := cache_tags(req_index); + write_tag(i, tagset, req_tag); + cache_tags(req_index) <= tagset; + end if; + end loop; + end if; + + -- Set up for wishbone writes + r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0); + r1.wb.sel <= (others => '1'); + r1.wb.we <= '1'; + r1.wb.dat <= (others => '0'); r1.wb.cyc <= '1'; r1.wb.stb <= '1'; - r1.wb.we <= '1'; - r1.state <= STORE_WAIT_ACK; - else - r1.stcx_fail <= '1'; - r1.state <= IDLE; + + -- Handle the rest like a load miss + r1.state <= RELOAD_WAIT_ACK; end if; -- OP_NONE and OP_BAD do nothing @@ -766,7 +806,7 @@ begin -- not idle, which we don't currently know how to deal -- with. -- - if r1.store_row = get_row(r1.req.addr) then + if r1.store_row = get_row(r1.req.addr) and r1.req.dcbz = '0' then r1.slow_data <= wishbone_in.dat; end if; diff --git a/decode1.vhdl b/decode1.vhdl index 8c7d5f2..785b669 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -164,7 +164,7 @@ architecture behaviour of decode1 is 2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst 2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt 2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst - -- 2#1111110110# dcbz + 2#1111110110# => (LDST, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- dcbz 2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu 2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo 2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 664e396..90650db 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -43,6 +43,7 @@ architecture behave of loadstore1 is type reg_stage_t is record -- latch most of the input request load : std_ulogic; + dcbz : std_ulogic; addr : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0); @@ -198,8 +199,11 @@ begin when IDLE => if l_in.valid = '1' then v.load := '0'; + v.dcbz := '0'; if l_in.op = OP_LOAD then v.load := '1'; + elsif l_in.op = OP_DCBZ then + v.dcbz := '1'; end if; v.addr := lsu_sum; v.write_reg := l_in.write_reg; @@ -293,6 +297,7 @@ begin -- Update outputs to dcache d_out.valid <= req; d_out.load <= v.load; + d_out.dcbz <= v.dcbz; d_out.nc <= v.nc; d_out.reserve <= v.reserve; d_out.addr <= addr;