From 91cbeee77cfebe1da3d9484d34b3c72af90d444b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 13 Jul 2020 12:18:53 +1000 Subject: [PATCH] loadstore1: Generate busy signal earlier This makes the calculation of busy as simple as possible and dependent only on register outputs. The timing of busy is critical, as it gates the valid signal for the next instruction, and therefore any delays in dropping busy at the end of a load or store directly impact the timing of a host of other paths. This also separates the 'done without error' and 'done with error' cases from the MMU into separate signals that are both driven directly from registers. Signed-off-by: Paul Mackerras --- common.vhdl | 1 + loadstore1.vhdl | 92 ++++++++++++++++++++++++++++++------------------- mmu.vhdl | 11 ++++-- 3 files changed, 66 insertions(+), 38 deletions(-) diff --git a/common.vhdl b/common.vhdl index a193df1..28b3434 100644 --- a/common.vhdl +++ b/common.vhdl @@ -315,6 +315,7 @@ package common is type MmuToLoadstore1Type is record done : std_ulogic; + err : std_ulogic; invalid : std_ulogic; badtree : std_ulogic; segerr : std_ulogic; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 4e1f943..660e6c0 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -80,6 +80,9 @@ architecture behave of loadstore1 is dsisr : std_ulogic_vector(31 downto 0); instr_fault : std_ulogic; sprval : std_ulogic_vector(63 downto 0); + busy : std_ulogic; + wait_dcache : std_ulogic; + wait_mmu : std_ulogic; end record; type byte_sel_t is array(0 to 7) of std_ulogic; @@ -128,6 +131,9 @@ begin if rising_edge(clk) then if rst = '1' then r.state <= IDLE; + r.busy <= '0'; + r.wait_dcache <= '0'; + r.wait_mmu <= '0'; else r <= rin; end if; @@ -228,8 +234,17 @@ begin -- compute (addr + 8) & ~7 for the second doubleword when unaligned next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000"; + -- Busy calculation. + -- We need to minimize the delay from clock to busy valid because it + -- gates the start of execution of the next instruction. + busy := r.busy or (r.wait_dcache and not d_in.valid) or (r.wait_mmu and not m_in.done); + done := '0'; + if r.state /= IDLE and busy = '0' then + done := '1'; + end if; exception := '0'; + case r.state is when IDLE => @@ -255,7 +270,6 @@ begin dsisr(63 - 38) := not r.load; -- XXX there is no architected bit for this dsisr(63 - 35) := d_in.cache_paradox; - v.state := IDLE; else -- Look up the translation for TLB miss -- and also for permission error and RC error @@ -279,8 +293,6 @@ begin else -- stores write back rA update in this cycle do_update := r.update; - done := '1'; - v.state := IDLE; end if; end if; end if; @@ -294,53 +306,36 @@ begin byte_sel := r.first_bytes; end if; if m_in.done = '1' then - if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and - m_in.badtree = '0' and m_in.segerr = '0' then - if r.instr_fault = '0' then - -- retry the request now that the MMU has installed a TLB entry - req := '1'; - if r.last_dword = '0' then - v.state := SECOND_REQ; - else - v.state := ACK_WAIT; - end if; + if r.instr_fault = '0' then + -- retry the request now that the MMU has installed a TLB entry + req := '1'; + if r.last_dword = '0' then + v.state := SECOND_REQ; else - -- nothing to do, the icache retries automatically - done := '1'; - v.state := IDLE; + v.state := ACK_WAIT; end if; - else - exception := '1'; - dsisr(63 - 33) := m_in.invalid; - dsisr(63 - 36) := m_in.perm_error; - dsisr(63 - 38) := not r.load; - dsisr(63 - 44) := m_in.badtree; - dsisr(63 - 45) := m_in.rc_error; - v.state := IDLE; end if; end if; + if m_in.err = '1' then + exception := '1'; + dsisr(63 - 33) := m_in.invalid; + dsisr(63 - 36) := m_in.perm_error; + dsisr(63 - 38) := not r.load; + dsisr(63 - 44) := m_in.badtree; + dsisr(63 - 45) := m_in.rc_error; + end if; when TLBIE_WAIT => - if m_in.done = '1' then - -- tlbie is finished - done := '1'; - v.state := IDLE; - end if; when LD_UPDATE => do_update := '1'; - v.state := IDLE; - done := '1'; when SPR_CMPLT => - done := '1'; - v.state := IDLE; end case; - busy := '1'; - if r.state = IDLE or done = '1' then - busy := '0'; + if done = '1' or exception = '1' then + v.state := IDLE; end if; -- Note that l_in.valid is gated with busy inside execute1 @@ -450,6 +445,31 @@ begin end if; end if; + -- Work out whether we'll be busy next cycle + v.busy := '0'; + v.wait_dcache := '0'; + v.wait_mmu := '0'; + case v.state is + when SECOND_REQ => + v.busy := '1'; + when ACK_WAIT => + if v.last_dword = '0' or (v.load = '1' and v.update = '1') then + v.busy := '1'; + else + v.wait_dcache := '1'; + end if; + when MMU_LOOKUP => + if v.instr_fault = '0' then + v.busy := '1'; + else + v.wait_mmu := '1'; + end if; + when TLBIE_WAIT => + v.wait_mmu := '1'; + when others => + -- not busy next cycle + end case; + -- Update outputs to dcache d_out.valid <= req; d_out.load <= v.load; diff --git a/mmu.vhdl b/mmu.vhdl index 6458a6e..09df3ae 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -52,6 +52,7 @@ architecture behave of mmu is -- internal state state : state_t; done : std_ulogic; + err : std_ulogic; pgtbl0 : std_ulogic_vector(63 downto 0); pt0_valid : std_ulogic; pgtbl3 : std_ulogic_vector(63 downto 0); @@ -92,7 +93,10 @@ begin report "MMU got tlb miss for " & to_hstring(rin.addr); end if; if l_out.done = '1' then - report "MMU completing op with invalid=" & std_ulogic'image(l_out.invalid) & + report "MMU completing op without error"; + end if; + if l_out.err = '1' then + report "MMU completing op with err invalid=" & std_ulogic'image(l_out.invalid) & " badtree=" & std_ulogic'image(l_out.badtree); end if; if rin.state = RADIX_LOOKUP then @@ -200,6 +204,7 @@ begin v.valid := '0'; dcreq := '0'; v.done := '0'; + v.err := '0'; v.invalid := '0'; v.badtree := '0'; v.segerror := '0'; @@ -412,7 +417,8 @@ begin end case; if v.state = RADIX_FINISH or (v.state = RADIX_LOAD_TLB and r.iside = '1') then - v.done := '1'; + v.err := v.invalid or v.badtree or v.segerror or v.perm_err or v.rc_error; + v.done := not v.err; end if; if r.addr(63) = '1' then @@ -451,6 +457,7 @@ begin end if; l_out.done <= r.done; + l_out.err <= r.err; l_out.invalid <= r.invalid; l_out.badtree <= r.badtree; l_out.segerr <= r.segerror;