dcache: Simplify addressing of the dcache TLB

Instead of having TLB invalidation and TLB load requests come through
the dcache main path, these operations are now done in one cycle
entirely based on signals from the MMU, and don't involve the TLB read
path or the dcache state machine at all.  So that we know which way of
the TLB to affect for invalidations, loadstore1 now sends down a "TLB
probe" operation for tlbie instructions which goes through the dcache
pipeline and sets the r1.tlb_hit_* fields which are used in the
subsequent invalidation operation from the MMU (if it is a single-page
invalidation).  TLB load operations write to the way identified by
r1.victim_way, which was set on the TLB miss that triggered the TLB
reload.

Since we are writing just one way of the TLB tags now, rather than
writing all ways with one way's value changed, we now pad each way to
a multiple of 8 bits so that byte write-enables can be used to select
which way gets written.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/441/head
Paul Mackerras 3 weeks ago
parent 5168242cd5
commit c938246cc8

@ -675,6 +675,7 @@ package common is
atomic_last : std_ulogic; atomic_last : std_ulogic;
virt_mode : std_ulogic; virt_mode : std_ulogic;
priv_mode : std_ulogic; priv_mode : std_ulogic;
tlb_probe : std_ulogic;
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1
byte_sel : std_ulogic_vector(7 downto 0); byte_sel : std_ulogic_vector(7 downto 0);

@ -135,7 +135,8 @@ architecture rtl of dcache is
constant TLB_SET_BITS : natural := log2(TLB_SET_SIZE); constant TLB_SET_BITS : natural := log2(TLB_SET_SIZE);
constant TLB_WAY_BITS : natural := maximum(log2(TLB_NUM_WAYS), 1); constant TLB_WAY_BITS : natural := maximum(log2(TLB_NUM_WAYS), 1);
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_SET_BITS); constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_SET_BITS);
constant TLB_TAG_WAY_BITS : natural := TLB_NUM_WAYS * TLB_EA_TAG_BITS; constant TLB_EA_TAG_WIDTH : natural := TLB_EA_TAG_BITS + 7 - ((TLB_EA_TAG_BITS + 7) mod 8);
constant TLB_TAG_WAY_BITS : natural := TLB_NUM_WAYS * TLB_EA_TAG_WIDTH;
constant TLB_PTE_BITS : natural := 64; constant TLB_PTE_BITS : natural := 64;
constant TLB_PTE_WAY_BITS : natural := TLB_NUM_WAYS * TLB_PTE_BITS; constant TLB_PTE_WAY_BITS : natural := TLB_NUM_WAYS * TLB_PTE_BITS;


@ -294,9 +295,6 @@ architecture rtl of dcache is
-- Stage 0 register, basically contains just the latched request -- Stage 0 register, basically contains just the latched request
type reg_stage_0_t is record type reg_stage_0_t is record
req : Loadstore1ToDcacheType; req : Loadstore1ToDcacheType;
tlbie : std_ulogic; -- indicates a tlbie request (from MMU)
doall : std_ulogic; -- with tlbie, indicates flush whole TLB
tlbld : std_ulogic; -- indicates a TLB load request (from MMU)
mmu_req : std_ulogic; -- indicates source of request mmu_req : std_ulogic; -- indicates source of request
d_valid : std_ulogic; -- indicates req.data is valid now d_valid : std_ulogic; -- indicates req.data is valid now
end record; end record;
@ -356,6 +354,7 @@ architecture rtl of dcache is
-- TLB hit state -- TLB hit state
tlb_hit : std_ulogic; tlb_hit : std_ulogic;
tlb_hit_way : tlb_way_sig_t; tlb_hit_way : tlb_way_sig_t;
tlb_hit_ways : tlb_expand_t;
tlb_hit_index : tlb_index_sig_t; tlb_hit_index : tlb_index_sig_t;
tlb_victim : tlb_way_sig_t; tlb_victim : tlb_way_sig_t;
ls_tlb_hit : std_ulogic; ls_tlb_hit : std_ulogic;
@ -566,19 +565,10 @@ architecture rtl of dcache is
function read_tlb_tag(way: tlb_way_t; tags: tlb_way_tags_t) return tlb_tag_t is function read_tlb_tag(way: tlb_way_t; tags: tlb_way_tags_t) return tlb_tag_t is
variable j : integer; variable j : integer;
begin begin
j := way * TLB_EA_TAG_BITS; j := way * TLB_EA_TAG_WIDTH;
return tags(j + TLB_EA_TAG_BITS - 1 downto j); return tags(j + TLB_EA_TAG_BITS - 1 downto j);
end; end;


-- Write a TLB tag to a TLB tag memory row
procedure write_tlb_tag(way: tlb_way_t; tags: inout tlb_way_tags_t;
tag: tlb_tag_t) is
variable j : integer;
begin
j := way * TLB_EA_TAG_BITS;
tags(j + TLB_EA_TAG_BITS - 1 downto j) := tag;
end;

-- Read a PTE from a TLB PTE memory row -- Read a PTE from a TLB PTE memory row
function read_tlb_pte(way: tlb_way_t; ptes: tlb_way_ptes_t) return tlb_pte_t is function read_tlb_pte(way: tlb_way_t; ptes: tlb_way_ptes_t) return tlb_pte_t is
variable j : integer; variable j : integer;
@ -587,13 +577,6 @@ architecture rtl of dcache is
return ptes(j + TLB_PTE_BITS - 1 downto j); return ptes(j + TLB_PTE_BITS - 1 downto j);
end; end;


procedure write_tlb_pte(way: tlb_way_t; ptes: inout tlb_way_ptes_t; newpte: tlb_pte_t) is
variable j : integer;
begin
j := way * TLB_PTE_BITS;
ptes(j + TLB_PTE_BITS - 1 downto j) := newpte;
end;

begin begin


assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE; assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
@ -623,26 +606,19 @@ begin
if m_in.valid = '1' then if m_in.valid = '1' then
r.req := Loadstore1ToDcacheInit; r.req := Loadstore1ToDcacheInit;
r.req.valid := '1'; r.req.valid := '1';
r.req.load := not (m_in.tlbie or m_in.tlbld); r.req.load := '1';
r.req.priv_mode := '1'; r.req.priv_mode := '1';
r.req.addr := m_in.addr; r.req.addr := m_in.addr;
r.req.data := m_in.pte;
r.req.byte_sel := (others => '1'); r.req.byte_sel := (others => '1');
r.tlbie := m_in.tlbie;
r.doall := m_in.doall;
r.tlbld := m_in.tlbld;
r.mmu_req := '1'; r.mmu_req := '1';
r.d_valid := '1'; r.d_valid := '1';
else else
r.req := d_in; r.req := d_in;
r.req.data := (others => '0'); r.req.data := (others => '0');
r.tlbie := '0';
r.doall := '0';
r.tlbld := '0';
r.mmu_req := '0'; r.mmu_req := '0';
r.d_valid := '0'; r.d_valid := '0';
end if; end if;
if r.req.valid = '1' and r.doall = '0' then if r.req.valid = '1' then
assert not is_X(r.req.addr) severity failure; assert not is_X(r.req.addr) severity failure;
end if; end if;
if rst = '1' then if rst = '1' then
@ -809,48 +785,39 @@ begin
end process; end process;


tlb_update : process(clk) tlb_update : process(clk)
variable tlbie : std_ulogic; variable tlb_wr_index : tlb_index_sig_t;
variable tlbwe : std_ulogic; variable j, k : integer;
variable repl_way : tlb_way_sig_t;
variable eatag : tlb_tag_t;
variable tagset : tlb_way_tags_t;
variable pteset : tlb_way_ptes_t;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
tlbie := r0_valid and r0.tlbie; tlb_wr_index := unsigned(m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
tlbwe := r0_valid and r0.tlbld; downto TLB_LG_PGSZ));
ev.dtlb_miss_resolved <= tlbwe; ev.dtlb_miss_resolved <= m_in.tlbld;
if rst = '1' or (tlbie = '1' and r0.doall = '1') then if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
-- clear all valid bits at once -- clear all valid bits at once
for i in tlb_index_t loop for i in tlb_index_t loop
dtlb_valids(i) <= (others => '0'); dtlb_valids(i) <= (others => '0');
end loop; end loop;
elsif tlbie = '1' then elsif m_in.tlbie = '1' then
for i in tlb_way_t loop for i in tlb_way_t loop
if tlb_hit_expand(i) = '1' then if r1.tlb_hit_ways(i) = '1' then
assert not is_X(tlb_req_index); assert not is_X(tlb_wr_index);
dtlb_valids(to_integer(tlb_req_index))(i) <= '0'; dtlb_valids(to_integer(tlb_wr_index))(i) <= '0';
end if; end if;
end loop; end loop;
elsif tlbwe = '1' then elsif m_in.tlbld = '1' then
assert not is_X(tlb_req_index); assert not is_X(tlb_wr_index);
repl_way := to_unsigned(0, TLB_WAY_BITS); assert not is_X(r1.tlb_victim);
if TLB_NUM_WAYS > 1 then for way in 0 to TLB_NUM_WAYS - 1 loop
if tlb_hit = '1' then if TLB_NUM_WAYS = 1 or way = to_integer(unsigned(r1.tlb_victim)) then
repl_way := tlb_hit_way; j := way * TLB_EA_TAG_WIDTH;
else dtlb_tags(to_integer(tlb_wr_index))(j + TLB_EA_TAG_WIDTH - 1 downto j) <=
repl_way := unsigned(r1.tlb_victim); (TLB_EA_TAG_WIDTH - 1 downto TLB_EA_TAG_BITS => '0') &
m_in.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
k := way * TLB_PTE_BITS;
dtlb_ptes(to_integer(tlb_wr_index))(k + TLB_PTE_BITS - 1 downto k) <= m_in.pte;
dtlb_valids(to_integer(tlb_wr_index))(way) <= '1';
end if; end if;
assert not is_X(repl_way); end loop;
end if;
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
tagset := tlb_tag_way;
write_tlb_tag(to_integer(repl_way), tagset, eatag);
dtlb_tags(to_integer(tlb_req_index)) <= tagset;
pteset := tlb_pte_way;
write_tlb_pte(to_integer(repl_way), pteset, r0.req.data);
dtlb_ptes(to_integer(tlb_req_index)) <= pteset;
dtlb_valids(to_integer(tlb_req_index))(to_integer(repl_way)) <= '1';
end if; end if;
end if; end if;
end process; end process;
@ -914,10 +881,10 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
if r0_stall = '1' then if r0_stall = '1' then
index := req_index; index := req_index;
valid := r0.req.valid and not (r0.tlbie or r0.tlbld); valid := r0.req.valid;
elsif m_in.valid = '1' then elsif m_in.valid = '1' then
index := get_index(m_in.addr); index := get_index(m_in.addr);
valid := not (m_in.tlbie or m_in.tlbld); valid := '1';
else else
index := get_index(d_in.addr); index := get_index(d_in.addr);
valid := d_in.valid; valid := d_in.valid;
@ -999,7 +966,7 @@ begin
dawr_match := r0.req.dawr_match; dawr_match := r0.req.dawr_match;
end if; end if;


go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.ls_error; go := r0_valid and not r1.ls_error;
if is_X(ra) then if is_X(ra) then
go := '0'; go := '0';
end if; end if;
@ -1173,6 +1140,12 @@ begin
else else
req_op_nop <= '1'; req_op_nop <= '1';
end if; end if;
elsif r0.req.tlb_probe = '1' then
-- TLB probe is sent down by loadstore1 before sending a TLB
-- invalidation to mmu, to get r1.tlb_hit_* set correctly
-- (for a single-page invalidation) for the address.
-- It doesn't require r1.ls_valid to be set on completion,
-- so there is nothing else to do here.
elsif access_ok = '0' then elsif access_ok = '0' then
req_op_bad <= '1'; req_op_bad <= '1';
elsif r0.req.flush = '1' then elsif r0.req.flush = '1' then
@ -1198,7 +1171,7 @@ begin
if r0_stall = '0' then if r0_stall = '0' then
if m_in.valid = '1' then if m_in.valid = '1' then
early_req_row <= get_row(m_in.addr); early_req_row <= get_row(m_in.addr);
early_rd_valid <= not (m_in.tlbie or m_in.tlbld); early_rd_valid <= '1';
else else
early_req_row <= get_row(d_in.addr); early_req_row <= get_row(d_in.addr);
early_rd_valid <= d_in.valid and d_in.load; early_rd_valid <= d_in.valid and d_in.load;
@ -1417,13 +1390,23 @@ begin
end if; end if;


-- Record TLB hit information for updating TLB PLRU -- Record TLB hit information for updating TLB PLRU
r1.tlb_hit <= tlb_hit; -- and for invalidating or updating TLB contents
r1.tlb_hit_way <= tlb_hit_way; if r0_valid = '1' then
r1.tlb_hit_index <= tlb_req_index; r1.tlb_hit <= tlb_hit;
r1.tlb_hit_way <= tlb_hit_way;
r1.tlb_hit_ways <= tlb_hit_expand;
r1.tlb_hit_index <= tlb_req_index;
else
r1.tlb_hit <= '0';
end if;
-- determine victim way in the TLB in the cycle after -- determine victim way in the TLB in the cycle after
-- we detect the TLB miss -- we detect the TLB miss
if r1.ls_error = '1' then if r1.ls_error = '1' then
r1.tlb_victim <= unsigned(tlb_plru_victim); if r1.tlb_hit = '0' then
r1.tlb_victim <= unsigned(tlb_plru_victim);
else
r1.tlb_victim <= r1.tlb_hit_way;
end if;
end if; end if;


end if; end if;
@ -1482,9 +1465,7 @@ begin
r1.stcx_fail <= '0'; r1.stcx_fail <= '0';


r1.ls_valid <= (req_op_load_hit or req_op_nop) and not r0.mmu_req; r1.ls_valid <= (req_op_load_hit or req_op_nop) and not r0.mmu_req;
-- complete tlbies and TLB loads in the third cycle r1.mmu_done <= req_op_load_hit and r0.mmu_req;
r1.mmu_done <= (r0_valid and (r0.tlbie or r0.tlbld)) or
(req_op_load_hit and r0.mmu_req);


-- Clear the reservation if another entity writes to that line -- Clear the reservation if another entity writes to that line
if kill_rsrv = '1' then if kill_rsrv = '1' then
@ -1582,7 +1563,7 @@ begin
r1.full <= req_op_load_miss or req_op_store or req_op_flush or req_op_sync; r1.full <= req_op_load_miss or req_op_store or req_op_flush or req_op_sync;
end if; end if;
end if; end if;
if r0_valid = '1' and r0.tlbld = '1' then if m_in.tlbld = '1' or m_in.tlbie = '1' then
r1.ls_tlb_hit <= '0'; r1.ls_tlb_hit <= '0';
end if; end if;



@ -712,8 +712,8 @@ begin
v.mmu_op := '1'; v.mmu_op := '1';
when others => when others =>
end case; end case;
v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr and v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz or v.tlbie) and
not hash_nop; not v.align_intr and not hash_nop;
v.incomplete := v.dc_req and v.two_dwords; v.incomplete := v.dc_req and v.two_dwords;


-- Work out controls for load and store formatting -- Work out controls for load and store formatting
@ -873,7 +873,7 @@ begin
dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode, dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode,
r1.req.priv_mode, r1.req.store) then r1.req.priv_mode, r1.req.store) then
dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and
not (r1.req.touch or r1.req.sync or r1.req.flush); not (r1.req.touch or r1.req.sync or r1.req.flush or r1.req.tlbie);
end if; end if;
end loop; end loop;
stage1_dawr_match <= dawr_match; stage1_dawr_match <= dawr_match;
@ -918,7 +918,7 @@ begin
v.req.store_data := store_data; v.req.store_data := store_data;
v.req.dawr_intr := dawr_match; v.req.dawr_intr := dawr_match;
v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
not r1.req.incomplete and not r1.req.hashcmp; not r1.req.incomplete and not r1.req.hashcmp and not r1.req.tlbie;
v.wait_mmu := r1.req.valid and r1.req.mmu_op; v.wait_mmu := r1.req.valid and r1.req.mmu_op;
if r1.req.valid = '1' and (r1.req.align_intr or r1.req.hashcmp) = '1' then if r1.req.valid = '1' and (r1.req.align_intr or r1.req.hashcmp) = '1' then
v.busy := '1'; v.busy := '1';
@ -1263,6 +1263,7 @@ begin
d_out.sync <= stage1_req.sync; d_out.sync <= stage1_req.sync;
d_out.nc <= stage1_req.nc; d_out.nc <= stage1_req.nc;
d_out.reserve <= stage1_req.reserve; d_out.reserve <= stage1_req.reserve;
d_out.tlb_probe <= stage1_req.tlbie;
d_out.atomic_qw <= stage1_req.atomic_qw; d_out.atomic_qw <= stage1_req.atomic_qw;
d_out.atomic_first <= stage1_req.atomic_first; d_out.atomic_first <= stage1_req.atomic_first;
d_out.atomic_last <= stage1_req.atomic_last; d_out.atomic_last <= stage1_req.atomic_last;
@ -1279,6 +1280,7 @@ begin
d_out.sync <= r2.req.sync; d_out.sync <= r2.req.sync;
d_out.nc <= r2.req.nc; d_out.nc <= r2.req.nc;
d_out.reserve <= r2.req.reserve; d_out.reserve <= r2.req.reserve;
d_out.tlb_probe <= r2.req.tlbie;
d_out.atomic_qw <= r2.req.atomic_qw; d_out.atomic_qw <= r2.req.atomic_qw;
d_out.atomic_first <= r2.req.atomic_first; d_out.atomic_first <= r2.req.atomic_first;
d_out.atomic_last <= r2.req.atomic_last; d_out.atomic_last <= r2.req.atomic_last;

@ -28,7 +28,6 @@ architecture behave of mmu is


type state_t is (IDLE, type state_t is (IDLE,
DO_TLBIE, DO_TLBIE,
TLB_WAIT,
PART_TBL_READ, PART_TBL_READ,
PART_TBL_WAIT, PART_TBL_WAIT,
PART_TBL_DONE, PART_TBL_DONE,
@ -195,7 +194,6 @@ begin
variable v : reg_stage_t; variable v : reg_stage_t;
variable dcreq : std_ulogic; variable dcreq : std_ulogic;
variable tlb_load : std_ulogic; variable tlb_load : std_ulogic;
variable itlb_load : std_ulogic;
variable tlbie_req : std_ulogic; variable tlbie_req : std_ulogic;
variable ptbl_rd : std_ulogic; variable ptbl_rd : std_ulogic;
variable prtbl_rd : std_ulogic; variable prtbl_rd : std_ulogic;
@ -225,7 +223,6 @@ begin
v.perm_err := '0'; v.perm_err := '0';
v.rc_error := '0'; v.rc_error := '0';
tlb_load := '0'; tlb_load := '0';
itlb_load := '0';
tlbie_req := '0'; tlbie_req := '0';
v.inval_all := '0'; v.inval_all := '0';
ptbl_rd := '0'; ptbl_rd := '0';
@ -309,14 +306,8 @@ begin
end if; end if;


when DO_TLBIE => when DO_TLBIE =>
dcreq := '1';
tlbie_req := '1'; tlbie_req := '1';
v.state := TLB_WAIT; v.state := RADIX_FINISH;

when TLB_WAIT =>
if d_in.done = '1' then
v.state := RADIX_FINISH;
end if;


when PART_TBL_READ => when PART_TBL_READ =>
dcreq := '1'; dcreq := '1';
@ -438,20 +429,14 @@ begin


when RADIX_LOAD_TLB => when RADIX_LOAD_TLB =>
tlb_load := '1'; tlb_load := '1';
if r.iside = '0' then v.state := RADIX_FINISH;
dcreq := '1';
v.state := TLB_WAIT;
else
itlb_load := '1';
v.state := IDLE;
end if;


when RADIX_FINISH => when RADIX_FINISH =>
v.state := IDLE; v.state := IDLE;


end case; end case;


if v.state = RADIX_FINISH or (v.state = RADIX_LOAD_TLB and r.iside = '1') then if v.state = RADIX_FINISH then
v.err := v.invalid or v.badtree or v.segerror or v.perm_err or v.rc_error; v.err := v.invalid or v.badtree or v.segerror or v.perm_err or v.rc_error;
v.done := not v.err; v.done := not v.err;
end if; end if;
@ -505,11 +490,11 @@ begin
d_out.valid <= dcreq; d_out.valid <= dcreq;
d_out.tlbie <= tlbie_req; d_out.tlbie <= tlbie_req;
d_out.doall <= r.inval_all; d_out.doall <= r.inval_all;
d_out.tlbld <= tlb_load; d_out.tlbld <= tlb_load and not r.iside;
d_out.addr <= addr; d_out.addr <= addr;
d_out.pte <= tlb_data; d_out.pte <= tlb_data;


i_out.tlbld <= itlb_load; i_out.tlbld <= tlb_load and r.iside;
i_out.tlbie <= tlbie_req; i_out.tlbie <= tlbie_req;
i_out.doall <= r.inval_all; i_out.doall <= r.inval_all;
i_out.addr <= addr; i_out.addr <= addr;

Loading…
Cancel
Save