From a658766fcf415bd40aa12cc26d34ec2a686188f8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 7 May 2020 20:02:21 +1000 Subject: [PATCH] Implement slbia as a dTLB/iTLB flush Slbia (with IH=7) is used in the Linux kernel to flush the ERATs (our iTLB/dTLB), so make it do that. This moves the logic to work out whether to flush a single entry or the whole TLB from dcache and icache into mmu. We now invalidate all dTLB and iTLB entries when the AP (actual pagesize) field of RB is non-zero on a tlbie[l], as well as when IS is non-zero. Signed-off-by: Paul Mackerras --- common.vhdl | 7 +++++-- dcache.vhdl | 16 +++++----------- decode1.vhdl | 1 + execute1.vhdl | 2 +- icache.vhdl | 19 +++---------------- loadstore1.vhdl | 3 ++- mmu.vhdl | 8 ++++++++ 7 files changed, 25 insertions(+), 31 deletions(-) diff --git a/common.vhdl b/common.vhdl index 79bc1bd..02f0d3f 100644 --- a/common.vhdl +++ b/common.vhdl @@ -227,6 +227,7 @@ package common is valid : std_ulogic; op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do nia : std_ulogic_vector(63 downto 0); + insn : std_ulogic_vector(31 downto 0); addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -242,12 +243,11 @@ package common is rc : std_ulogic; -- set for stcx. virt_mode : std_ulogic; -- do translation through TLB priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0) - spr_num : spr_num_t; -- SPR number for mfspr/mtspr end record; constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', sign_extend => '0', update => '0', xerc => xerc_init, reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0', - spr_num => 0, others => (others => '0')); + others => (others => '0')); type Loadstore1ToExecute1Type is record exception : std_ulogic; @@ -283,6 +283,7 @@ package common is type Loadstore1ToMmuType is record valid : std_ulogic; tlbie : std_ulogic; + slbia : std_ulogic; mtspr : std_ulogic; iside : std_ulogic; load : std_ulogic; @@ -305,6 +306,7 @@ package common is type MmuToDcacheType is record valid : std_ulogic; tlbie : std_ulogic; + doall : std_ulogic; tlbld : std_ulogic; addr : std_ulogic_vector(63 downto 0); pte : std_ulogic_vector(63 downto 0); @@ -320,6 +322,7 @@ package common is type MmuToIcacheType is record tlbld : std_ulogic; tlbie : std_ulogic; + doall : std_ulogic; addr : std_ulogic_vector(63 downto 0); pte : std_ulogic_vector(63 downto 0); end record; diff --git a/dcache.vhdl b/dcache.vhdl index b75d91f..a9b5c4a 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -210,6 +210,7 @@ architecture rtl of dcache is type reg_stage_0_t is record req : Loadstore1ToDcacheType; tlbie : std_ulogic; + doall : std_ulogic; tlbld : std_ulogic; mmu_req : std_ulogic; -- indicates source of request end record; @@ -456,11 +457,13 @@ begin r0.req.data <= m_in.pte; r0.req.byte_sel <= (others => '1'); r0.tlbie <= m_in.tlbie; + r0.doall <= m_in.doall; r0.tlbld <= m_in.tlbld; r0.mmu_req <= '1'; else r0.req <= d_in; r0.tlbie <= '0'; + r0.doall <= '0'; r0.tlbld <= '0'; r0.mmu_req <= '0'; end if; @@ -572,7 +575,6 @@ begin tlb_update : process(clk) variable tlbie : std_ulogic; - variable tlbia : std_ulogic; variable tlbwe : std_ulogic; variable repl_way : tlb_way_t; variable eatag : tlb_tag_t; @@ -580,17 +582,9 @@ begin variable pteset : tlb_way_ptes_t; begin if rising_edge(clk) then - tlbie := '0'; - tlbia := '0'; + tlbie := r0_valid and r0.tlbie; tlbwe := r0_valid and r0.tlbld; - if r0_valid = '1' and r0.tlbie = '1' then - if r0.req.addr(11 downto 10) /= "00" then - tlbia := '1'; - else - tlbie := '1'; - end if; - end if; - if rst = '1' or tlbia = '1' then + if rst = '1' or (tlbie = '1' and r0.doall = '1') then -- clear all valid bits at once for i in tlb_index_t loop dtlb_valids(i) <= (others => '0'); diff --git a/decode1.vhdl b/decode1.vhdl index 90a5980..cd17d1e 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -282,6 +282,7 @@ architecture behaviour of decode1 is 2#0010111010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd 2#0010011010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw -- 2#0010000000# setb + 2#0111110010# => (LDST, OP_TLBIE, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- slbia 2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- sld 2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- slw 2#1100011010# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- srad diff --git a/execute1.vhdl b/execute1.vhdl index 78361c2..688f93c 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1023,7 +1023,7 @@ begin lv.xerc := v.e.xerc; lv.reserve := e_in.reserve; lv.rc := e_in.rc; - lv.spr_num := decode_spr_num(e_in.insn); + lv.insn := e_in.insn; -- decode l*cix and st*cix instructions here if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and e_in.insn(5 downto 1) = "10101" then diff --git a/icache.vhdl b/icache.vhdl index 7d7973d..86c2746 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -435,32 +435,19 @@ begin -- iTLB update itlb_update: process(clk) - variable tlbie : std_ulogic; - variable tlbia : std_ulogic; - variable tlbwe : std_ulogic; variable wr_index : tlb_index_t; begin if rising_edge(clk) then - tlbie := '0'; - tlbia := '0'; - tlbwe := m_in.tlbld; - if m_in.tlbie = '1' then - if m_in.addr(11 downto 10) /= "00" then - tlbia := '1'; - else - tlbie := '1'; - end if; - end if; wr_index := hash_ea(m_in.addr); - if rst = '1' or tlbia = '1' then + if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then -- clear all valid bits for i in tlb_index_t loop itlb_valids(i) <= '0'; end loop; - elsif tlbie = '1' then + elsif m_in.tlbie = '1' then -- clear entry regardless of hit or miss itlb_valids(wr_index) <= '0'; - elsif tlbwe = '1' then + elsif m_in.tlbld = '1' then itlb_tags(wr_index) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS); itlb_ptes(wr_index) <= m_in.pte; itlb_valids(wr_index) <= '1'; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index b7b56d4..251f529 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -166,7 +166,7 @@ begin mfspr := '0'; mmu_mtspr := '0'; itlb_fault := '0'; - sprn := std_ulogic_vector(to_unsigned(l_in.spr_num, 10)); + sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10)); sprval := (others => '0'); -- avoid inferred latches exception := '0'; dsisr := (others => '0'); @@ -468,6 +468,7 @@ begin m_out.mtspr <= mmu_mtspr; m_out.sprn <= sprn(3 downto 0); m_out.addr <= addr; + m_out.slbia <= l_in.insn(7); m_out.rs <= l_in.data; -- Update outputs to writeback diff --git a/mmu.vhdl b/mmu.vhdl index e770d99..8415443 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -168,6 +168,7 @@ begin variable tlb_load : std_ulogic; variable itlb_load : std_ulogic; variable tlbie_req : std_ulogic; + variable inval_all : std_ulogic; variable rts : unsigned(5 downto 0); variable mbits : unsigned(5 downto 0); variable pgtable_addr : std_ulogic_vector(63 downto 0); @@ -191,6 +192,7 @@ begin tlb_load := '0'; itlb_load := '0'; tlbie_req := '0'; + inval_all := '0'; -- Radix tree data structures in memory are big-endian, -- so we need to byte-swap them @@ -217,6 +219,10 @@ begin if l_in.tlbie = '1' then dcreq := '1'; tlbie_req := '1'; + -- Invalidate all iTLB/dTLB entries for tlbie with + -- RB[IS] != 0 or RB[AP] != 0, or for slbia + inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or + l_in.addr(7) or l_in.addr(6) or l_in.addr(5); v.state := TLB_WAIT; else v.valid := '1'; @@ -356,12 +362,14 @@ begin d_out.valid <= dcreq; d_out.tlbie <= tlbie_req; + d_out.doall <= inval_all; d_out.tlbld <= tlb_load; d_out.addr <= addr; d_out.pte <= tlb_data; i_out.tlbld <= itlb_load; i_out.tlbie <= tlbie_req; + i_out.doall <= inval_all; i_out.addr <= addr; i_out.pte <= tlb_data;