dcache: Snoop writes to memory by other agents

This adds a path where the wishbone that goes out to memory and I/O
also gets fed back to the dcache, which looks for writes that it
didn't initiate, and invalidates any cache line that gets written to.

This involves a second read port on the cache tag RAM for looking up
the snooped writes, and effectively a second write port on the cache
valid bit array to clear bits corresponding to snoop hits.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/287/head
Paul Mackerras 4 years ago
parent 4a8ab3331c
commit eb7eba2d92

@ -37,6 +37,8 @@ entity core is
wishbone_data_in : in wishbone_slave_out; wishbone_data_in : in wishbone_slave_out;
wishbone_data_out : out wishbone_master_out; wishbone_data_out : out wishbone_master_out;


wb_snoop_in : in wishbone_master_out;

dmi_addr : in std_ulogic_vector(3 downto 0); dmi_addr : in std_ulogic_vector(3 downto 0);
dmi_din : in std_ulogic_vector(63 downto 0); dmi_din : in std_ulogic_vector(63 downto 0);
dmi_dout : out std_ulogic_vector(63 downto 0); dmi_dout : out std_ulogic_vector(63 downto 0);
@ -423,6 +425,7 @@ begin
stall_out => dcache_stall_out, stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in, wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out, wishbone_out => wishbone_data_out,
snoop_in => wb_snoop_in,
log_out => log_data(170 downto 151) log_out => log_data(170 downto 151)
); );



@ -39,6 +39,8 @@ entity dcache is
m_in : in MmuToDcacheType; m_in : in MmuToDcacheType;
m_out : out DcacheToMmuType; m_out : out DcacheToMmuType;


snoop_in : in wishbone_master_out := wishbone_master_out_init;

stall_out : out std_ulogic; stall_out : out std_ulogic;


wishbone_out : out wishbone_master_out; wishbone_out : out wishbone_master_out;
@ -415,6 +417,11 @@ architecture rtl of dcache is
type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0); type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
signal tlb_plru_victim : tlb_plru_out_t; signal tlb_plru_victim : tlb_plru_out_t;


signal snoop_tag_set : cache_tags_set_t;
signal snoop_valid : std_ulogic;
signal snoop_wrtag : cache_tag_t;
signal snoop_index : index_t;

-- --
-- Helper functions to decode incoming requests -- Helper functions to decode incoming requests
-- --
@ -528,7 +535,8 @@ begin
assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE; assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE; assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE; assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;
assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2" severity FAILURE; assert ispow2(ROW_PER_LINE) and ROW_PER_LINE > 1
report "ROW_PER_LINE not power of 2 greater than 1" severity FAILURE;
assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS) assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
report "geometry bits don't add up" severity FAILURE; report "geometry bits don't add up" severity FAILURE;
assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
@ -783,6 +791,24 @@ begin
end if; end if;
end process; end process;


-- Cache tag RAM second read port, for snooping
cache_tag_read_2 : process(clk)
variable addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
begin
if rising_edge(clk) then
addr := (others => '0');
addr(snoop_in.adr'left downto 0) := snoop_in.adr;
snoop_tag_set <= cache_tags(get_index(addr));
snoop_wrtag <= get_tag(addr);
snoop_index <= get_index(addr);
-- Don't snoop our own cycles
snoop_valid <= '0';
if not (r1.wb.cyc = '1' and wishbone_in.stall = '0') then
snoop_valid <= snoop_in.cyc and snoop_in.stb and snoop_in.we;
end if;
end if;
end process;

-- Cache request parsing and hit detection -- Cache request parsing and hit detection
dcache_request : process(all) dcache_request : process(all)
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
@ -1293,6 +1319,13 @@ begin
end if; end if;
end if; end if;


-- Do invalidations from snooped stores to memory
for i in way_t loop
if snoop_valid = '1' and read_tag(i, snoop_tag_set) = snoop_wrtag then
cache_valids(snoop_index)(i) <= '0';
end if;
end loop;

if r1.write_tag = '1' then if r1.write_tag = '1' then
-- Store new tag in selected way -- Store new tag in selected way
for i in 0 to NUM_WAYS-1 loop for i in 0 to NUM_WAYS-1 loop

@ -133,6 +133,7 @@ architecture behaviour of soc is
-- Wishbone master (output of arbiter): -- Wishbone master (output of arbiter):
signal wb_master_in : wishbone_slave_out; signal wb_master_in : wishbone_slave_out;
signal wb_master_out : wishbone_master_out; signal wb_master_out : wishbone_master_out;
signal wb_snoop : wishbone_master_out;


-- Main "IO" bus, from main slave decoder to the latch -- Main "IO" bus, from main slave decoder to the latch
signal wb_io_in : wishbone_master_out; signal wb_io_in : wishbone_master_out;
@ -284,6 +285,7 @@ begin
wishbone_insn_out => wishbone_icore_out, wishbone_insn_out => wishbone_icore_out,
wishbone_data_in => wishbone_dcore_in, wishbone_data_in => wishbone_dcore_in,
wishbone_data_out => wishbone_dcore_out, wishbone_data_out => wishbone_dcore_out,
wb_snoop_in => wb_snoop,
dmi_addr => dmi_addr(3 downto 0), dmi_addr => dmi_addr(3 downto 0),
dmi_dout => dmi_core_dout, dmi_dout => dmi_core_dout,
dmi_din => dmi_dout, dmi_din => dmi_dout,
@ -313,6 +315,18 @@ begin
wb_slave_in => wb_master_in wb_slave_in => wb_master_in
); );


-- Snoop bus going to caches.
-- Gate stb with stall so the caches don't see the stalled strobes.
-- That way if the caches see a strobe when their wishbone is stalled,
-- they know it is an access by another master.
process(all)
begin
wb_snoop <= wb_master_out;
if wb_master_in.stall = '1' then
wb_snoop.stb <= '0';
end if;
end process;

-- Top level Wishbone slaves address decoder & mux -- Top level Wishbone slaves address decoder & mux
-- --
-- From CPU to BRAM, DRAM, IO, selected on top 3 bits and dram_at_0 -- From CPU to BRAM, DRAM, IO, selected on top 3 bits and dram_at_0

Loading…
Cancel
Save