From 01046527ba0e720a3f2a97e4d837a5d12ae68061 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 28 Apr 2020 14:54:22 +1000 Subject: [PATCH] MMU: Do radix page table walks on iTLB misses This hooks up the connections so that an OP_FETCH_FAILED coming down to loadstore1 will get sent to the MMU for it to do a radix tree walk for the instruction address. The MMU then sends the resulting PTE to the icache module to be installed in the iTLB. If no valid PTE can be found, the MMU sends an error signal back to loadstore1 which sends it on to execute1 to generate an ISI. Signed-off-by: Paul Mackerras --- common.vhdl | 4 ++++ execute1.vhdl | 5 ++++- loadstore1.vhdl | 42 ++++++++++++++++++++++-------------- mmu.vhdl | 57 +++++++++++++++++++++++++++++++++---------------- 4 files changed, 73 insertions(+), 35 deletions(-) diff --git a/common.vhdl b/common.vhdl index ba8aab3..79bc1bd 100644 --- a/common.vhdl +++ b/common.vhdl @@ -251,6 +251,10 @@ package common is type Loadstore1ToExecute1Type is record exception : std_ulogic; + invalid : std_ulogic; + perm_error : std_ulogic; + rc_error : std_ulogic; + badtree : std_ulogic; segment_fault : std_ulogic; instr_fault : std_ulogic; end record; diff --git a/execute1.vhdl b/execute1.vhdl index 71c79ee..78361c2 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -991,7 +991,10 @@ begin end if; else if l_in.segment_fault = '0' then - ctrl_tmp.srr1(63 - 33) <= '1'; + ctrl_tmp.srr1(63 - 33) <= l_in.invalid; + ctrl_tmp.srr1(63 - 35) <= l_in.perm_error; -- noexec fault + ctrl_tmp.srr1(63 - 44) <= l_in.badtree; + ctrl_tmp.srr1(63 - 45) <= l_in.rc_error; ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#400#, 64)); else ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#480#, 64)); diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 666cf4e..b7b56d4 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -41,8 +41,7 @@ architecture behave of loadstore1 is ACK_WAIT, -- waiting for ack from dcache LD_UPDATE, -- writing rA with computed addr on load MMU_LOOKUP, -- waiting for MMU to look up translation - TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie - DO_ISI + TLBIE_WAIT -- waiting for MMU to finish doing a tlbie ); type reg_stage_t is record @@ -231,6 +230,7 @@ begin case r.state is when IDLE => if l_in.valid = '1' then + v.addr := lsu_sum; v.load := '0'; v.dcbz := '0'; v.tlbie := '0'; @@ -278,14 +278,17 @@ begin mmu_mtspr := '1'; end if; when OP_FETCH_FAILED => - -- for now, always signal an ISI in the next cycle + -- send it to the MMU to do the radix walk + addr := l_in.nia; + v.addr := l_in.nia; v.instr_fault := '1'; - v.state := DO_ISI; + mmureq := '1'; + stall := '1'; + v.state := MMU_LOOKUP; when others => assert false report "unknown op sent to loadstore1"; end case; - v.addr := lsu_sum; v.write_reg := l_in.write_reg; v.length := l_in.length; v.byte_reverse := l_in.byte_reverse; @@ -403,12 +406,19 @@ begin if m_in.done = '1' then if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and m_in.badtree = '0' and m_in.segerr = '0' then - -- retry the request now that the MMU has installed a TLB entry - req := '1'; - if two_dwords = '1' and r.dwords_done = '0' then - v.state := SECOND_REQ; + if r.instr_fault = '0' then + -- retry the request now that the MMU has installed a TLB entry + req := '1'; + if two_dwords = '1' and r.dwords_done = '0' then + v.state := SECOND_REQ; + else + v.state := ACK_WAIT; + end if; else - v.state := ACK_WAIT; + -- nothing to do, the icache retries automatically + stall := '0'; + done := '1'; + v.state := IDLE; end if; else exception := '1'; @@ -435,9 +445,6 @@ begin v.state := IDLE; done := '1'; - when DO_ISI => - exception := '1'; - v.state := IDLE; end case; -- Update outputs to dcache @@ -454,7 +461,7 @@ begin -- Update outputs to MMU m_out.valid <= mmureq; - m_out.iside <= itlb_fault; + m_out.iside <= v.instr_fault; m_out.load <= r.load; m_out.priv <= r.priv_mode; m_out.tlbie <= v.tlbie; @@ -486,11 +493,14 @@ begin -- update exception info back to execute1 e_out.exception <= exception; - e_out.segment_fault <= '0'; e_out.instr_fault <= r.instr_fault; + e_out.invalid <= m_in.invalid; + e_out.badtree <= m_in.badtree; + e_out.perm_error <= m_in.perm_error; + e_out.rc_error <= m_in.rc_error; + e_out.segment_fault <= m_in.segerr; if exception = '1' and r.instr_fault = '0' then v.dar := addr; - e_out.segment_fault <= m_in.segerr; if m_in.segerr = '0' then v.dsisr := dsisr; end if; diff --git a/mmu.vhdl b/mmu.vhdl index e26c5a7..e770d99 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -38,6 +38,7 @@ architecture behave of mmu is type reg_stage_t is record -- latched request from loadstore1 valid : std_ulogic; + iside : std_ulogic; store : std_ulogic; priv : std_ulogic; addr : std_ulogic_vector(63 downto 0); @@ -165,15 +166,18 @@ begin variable dcreq : std_ulogic; variable done : std_ulogic; variable tlb_load : std_ulogic; + variable itlb_load : std_ulogic; variable tlbie_req : std_ulogic; variable rts : unsigned(5 downto 0); variable mbits : unsigned(5 downto 0); variable pgtable_addr : std_ulogic_vector(63 downto 0); variable pte : std_ulogic_vector(63 downto 0); - variable data : std_ulogic_vector(63 downto 0); + variable tlb_data : std_ulogic_vector(63 downto 0); variable nonzero : std_ulogic; variable perm_ok : std_ulogic; variable rc_ok : std_ulogic; + variable addr : std_ulogic_vector(63 downto 0); + variable data : std_ulogic_vector(63 downto 0); begin v := r; v.valid := '0'; @@ -185,6 +189,7 @@ begin v.perm_err := '0'; v.rc_error := '0'; tlb_load := '0'; + itlb_load := '0'; tlbie_req := '0'; -- Radix tree data structures in memory are big-endian, @@ -206,7 +211,8 @@ begin if l_in.valid = '1' then v.addr := l_in.addr; - v.store := not l_in.load; + v.iside := l_in.iside; + v.store := not (l_in.load or l_in.iside); v.priv := l_in.priv; if l_in.tlbie = '1' then dcreq := '1'; @@ -262,7 +268,13 @@ begin -- check permissions and RC bits perm_ok := '0'; if r.priv = '1' or data(3) = '0' then - perm_ok := data(1) or (data(2) and not r.store); + if r.iside = '0' then + perm_ok := data(1) or (data(2) and not r.store); + else + -- no IAMR, so no KUEP support for now + -- deny execute permission if cache inhibited + perm_ok := data(0) and not data(5); + end if; end if; rc_ok := data(8) and (data(7) or not r.store); if perm_ok = '1' and rc_ok = '1' then @@ -298,8 +310,14 @@ begin when RADIX_LOAD_TLB => tlb_load := '1'; - dcreq := '1'; - v.state := TLB_WAIT; + if r.iside = '0' then + dcreq := '1'; + v.state := TLB_WAIT; + else + itlb_load := '1'; + done := '1'; + v.state := IDLE; + end if; when RADIX_ERROR => done := '1'; @@ -318,6 +336,17 @@ begin rin <= v; -- drive outputs + if tlbie_req = '1' then + addr := l_in.addr; + tlb_data := l_in.rs; + elsif tlb_load = '1' then + addr := r.addr(63 downto 12) & x"000"; + tlb_data := pte; + else + addr := pgtable_addr; + tlb_data := (others => '0'); + end if; + l_out.done <= done; l_out.invalid <= r.invalid; l_out.badtree <= r.badtree; @@ -328,21 +357,13 @@ begin d_out.valid <= dcreq; d_out.tlbie <= tlbie_req; d_out.tlbld <= tlb_load; - if tlbie_req = '1' then - d_out.addr <= l_in.addr; - d_out.pte <= l_in.rs; - elsif tlb_load = '1' then - d_out.addr <= r.addr(63 downto 12) & x"000"; - d_out.pte <= pte; - else - d_out.addr <= pgtable_addr; - d_out.pte <= (others => '0'); - end if; + d_out.addr <= addr; + d_out.pte <= tlb_data; - i_out.tlbld <= '0'; + i_out.tlbld <= itlb_load; i_out.tlbie <= tlbie_req; - i_out.addr <= l_in.addr; - i_out.pte <= l_in.rs; + i_out.addr <= addr; + i_out.pte <= tlb_data; end process; end;